This source file includes following definitions.
- crmd_ha_msg_filter
- node_alive
- peer_update_callback
- crm_fsa_trigger
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <string.h>
14
15 #include <crm/crm.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
18 #include <crm/cluster.h>
19 #include <crm/cib.h>
20
21 #include <pacemaker-controld.h>
22
23
24 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
25
26 void
27 crmd_ha_msg_filter(xmlNode * msg)
28 {
29 if (AM_I_DC) {
30 const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
31
32 if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) {
33 const char *from = crm_element_value(msg, F_ORIG);
34
35 if (!pcmk__str_eq(from, controld_globals.our_nodename,
36 pcmk__str_casei)) {
37 int level = LOG_INFO;
38 const char *op = crm_element_value(msg, F_CRM_TASK);
39
40
41 if (controld_globals.fsa_state != S_ELECTION) {
42 ha_msg_input_t new_input;
43
44 level = LOG_WARNING;
45 new_input.msg = msg;
46 register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
47 __func__);
48 }
49
50 do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
51 goto done;
52 }
53 }
54
55 } else {
56 const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
57
58 if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) {
59 return;
60 }
61 }
62
63
64 route_message(C_HA_MESSAGE, msg);
65
66 done:
67 controld_trigger_fsa();
68 }
69
70
71
72
73
74
75
76
77
78
79
80 static int
81 node_alive(const crm_node_t *node)
82 {
83 if (pcmk_is_set(node->flags, crm_remote_node)) {
84
85 return pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei) ? 1: -1;
86
87 } else if (crm_is_peer_active(node)) {
88
89 return 1;
90
91 } else if (!pcmk_is_set(node->processes, crm_get_cluster_proc())
92 && !pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
93
94 return -1;
95 }
96
97
98 return 0;
99 }
100
101 #define state_text(state) ((state)? (const char *)(state) : "in unknown state")
102
103 void
104 peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
105 {
106 uint32_t old = 0;
107 bool appeared = FALSE;
108 bool is_remote = pcmk_is_set(node->flags, crm_remote_node);
109
110
111
112
113
114 if (!is_remote) {
115 controld_set_fsa_input_flags(R_PEER_DATA);
116 }
117
118 if (type == crm_status_processes
119 && pcmk_is_set(node->processes, crm_get_cluster_proc())
120 && !AM_I_DC
121 && !is_remote) {
122
123
124
125
126 xmlNode *query = create_request(CRM_OP_HELLO, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
127
128 crm_debug("Sending hello to node %u so that it learns our node name", node->id);
129 send_cluster_message(node, crm_msg_crmd, query, FALSE);
130
131 free_xml(query);
132 }
133
134 if (node->uname == NULL) {
135 return;
136 }
137
138 switch (type) {
139 case crm_status_uname:
140
141 crm_info("%s node %s is now %s",
142 (is_remote? "Remote" : "Cluster"),
143 node->uname, state_text(node->state));
144 return;
145
146 case crm_status_nstate:
147
148
149
150 CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei),
151 return);
152
153 crm_info("%s node %s is now %s (was %s)",
154 (is_remote? "Remote" : "Cluster"),
155 node->uname, state_text(node->state), state_text(data));
156
157 if (pcmk__str_eq(CRM_NODE_MEMBER, node->state, pcmk__str_casei)) {
158 appeared = TRUE;
159 if (!is_remote) {
160 remove_stonith_cleanup(node->uname);
161 }
162 } else {
163 controld_remove_failed_sync_node(node->uname);
164 controld_remove_voter(node->uname);
165 }
166
167 crmd_alert_node_event(node);
168 break;
169
170 case crm_status_processes:
171 CRM_CHECK(data != NULL, return);
172 old = *(const uint32_t *)data;
173 appeared = pcmk_is_set(node->processes, crm_get_cluster_proc());
174
175 {
176 const char *dc_s = controld_globals.dc_name;
177
178 if ((dc_s == NULL) && AM_I_DC) {
179 dc_s = "true";
180 }
181
182 crm_info("Node %s is %s a peer " CRM_XS
183 " DC=%s old=%#07x new=%#07x",
184 node->uname, (appeared? "now" : "no longer"),
185 pcmk__s(dc_s, "<none>"), old, node->processes);
186 }
187
188 if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) {
189
190
191
192 crm_trace("Process flag %#7x did not change from %#7x to %#7x",
193 crm_get_cluster_proc(), old, node->processes);
194 return;
195
196 }
197
198 if (!appeared) {
199 node->peer_lost = time(NULL);
200 controld_remove_failed_sync_node(node->uname);
201 controld_remove_voter(node->uname);
202 }
203
204 if (!pcmk_is_set(controld_globals.fsa_input_register,
205 R_CIB_CONNECTED)) {
206 crm_trace("Ignoring peer status change because not connected to CIB");
207 return;
208
209 } else if (controld_globals.fsa_state == S_STOPPING) {
210 crm_trace("Ignoring peer status change because stopping");
211 return;
212 }
213
214 if (!appeared
215 && pcmk__str_eq(node->uname, controld_globals.our_nodename,
216 pcmk__str_casei)) {
217
218 crm_notice("Our peer connection failed");
219 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
220
221 } else if (pcmk__str_eq(node->uname, controld_globals.dc_name,
222 pcmk__str_casei)
223 && !crm_is_peer_active(node)) {
224
225 crm_notice("Our peer on the DC (%s) is dead",
226 controld_globals.dc_name);
227 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
228
229
230
231
232
233
234
235
236
237 if (compare_version(controld_globals.dc_version, "3.0.9") > 0) {
238 controld_delete_node_state(node->uname,
239 controld_section_attrs,
240 cib_scope_local);
241 }
242
243 } else if (AM_I_DC
244 || pcmk_is_set(controld_globals.flags, controld_dc_left)
245 || (controld_globals.dc_name == NULL)) {
246
247
248
249
250 if (appeared) {
251 te_trigger_stonith_history_sync(FALSE);
252 } else {
253 controld_delete_node_state(node->uname,
254 controld_section_attrs,
255 cib_scope_local);
256 }
257 }
258 break;
259 }
260
261 if (AM_I_DC) {
262 xmlNode *update = NULL;
263 int flags = node_update_peer;
264 int alive = node_alive(node);
265 pcmk__graph_action_t *down = match_down_event(node->uuid);
266
267 crm_trace("Alive=%d, appeared=%d, down=%d",
268 alive, appeared, (down? down->id : -1));
269
270 if (appeared && (alive > 0) && !is_remote) {
271 register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
272 }
273
274 if (down) {
275 const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);
276
277 if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
278
279
280 crm_trace("Updating CIB %s fencer reported fencing of %s complete",
281 (pcmk_is_set(down->flags, pcmk__graph_action_confirmed)? "after" : "before"), node->uname);
282
283 } else if (!appeared && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
284
285
286 if (!is_remote) {
287 flags |= node_update_join | node_update_expected;
288 crmd_peer_down(node, FALSE);
289 check_join_state(controld_globals.fsa_state, __func__);
290 }
291 if (alive >= 0) {
292 crm_info("%s of peer %s is in progress " CRM_XS " action=%d",
293 task, node->uname, down->id);
294 } else {
295 crm_notice("%s of peer %s is complete " CRM_XS " action=%d",
296 task, node->uname, down->id);
297 pcmk__update_graph(controld_globals.transition_graph, down);
298 trigger_graph();
299 }
300
301 } else {
302 crm_trace("Node %s is %s, was expected to %s (op %d)",
303 node->uname,
304 ((alive > 0)? "alive" :
305 ((alive < 0)? "dead" : "partially alive")),
306 task, down->id);
307 }
308
309 } else if (appeared == FALSE) {
310 if ((controld_globals.transition_graph == NULL)
311 || (controld_globals.transition_graph->id == -1)) {
312 crm_info("Stonith/shutdown of node %s is unknown to the "
313 "current DC", node->uname);
314 } else {
315 crm_warn("Stonith/shutdown of node %s was not expected",
316 node->uname);
317 }
318 if (!is_remote) {
319 crm_update_peer_join(__func__, node, crm_join_none);
320 check_join_state(controld_globals.fsa_state, __func__);
321 }
322 abort_transition(INFINITY, pcmk__graph_restart, "Node failure",
323 NULL);
324 fail_incompletable_actions(controld_globals.transition_graph,
325 node->uuid);
326
327 } else {
328 crm_trace("Node %s came up, was not expected to be down",
329 node->uname);
330 }
331
332 if (is_remote) {
333
334
335
336 flags |= node_update_cluster;
337
338
339 if (appeared) {
340 abort_transition(INFINITY, pcmk__graph_restart,
341 "Pacemaker Remote node integrated", NULL);
342 }
343 }
344
345
346 update = create_node_state_update(node, flags, NULL, __func__);
347 if (update == NULL) {
348 crm_debug("Node state update not yet possible for %s", node->uname);
349 } else {
350 fsa_cib_anon_update(XML_CIB_TAG_STATUS, update);
351 }
352 free_xml(update);
353 }
354
355 controld_trigger_fsa();
356 }
357
358 gboolean
359 crm_fsa_trigger(gpointer user_data)
360 {
361 crm_trace("Invoked (queue len: %d)",
362 g_list_length(controld_globals.fsa_message_queue));
363 s_crmd_fsa(C_FSA_INTERNAL);
364 crm_trace("Exited (queue len: %d)",
365 g_list_length(controld_globals.fsa_message_queue));
366 return TRUE;
367 }