This source file includes following definitions.
- crmd_ha_msg_filter
- node_alive
- peer_update_callback
- crm_fsa_trigger
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <string.h>
14
15 #include <crm/crm.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
18 #include <crm/cluster.h>
19 #include <crm/cib.h>
20
21 #include <pacemaker-controld.h>
22
23
24 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
25
26 void
27 crmd_ha_msg_filter(xmlNode * msg)
28 {
29 if (AM_I_DC) {
30 const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
31
32 if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) {
33 const char *from = crm_element_value(msg, F_ORIG);
34
35 if (!pcmk__str_eq(from, controld_globals.our_nodename,
36 pcmk__str_casei)) {
37 int level = LOG_INFO;
38 const char *op = crm_element_value(msg, F_CRM_TASK);
39
40
41 if (controld_globals.fsa_state != S_ELECTION) {
42 ha_msg_input_t new_input;
43
44 level = LOG_WARNING;
45 new_input.msg = msg;
46 register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
47 __func__);
48 }
49
50 do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
51 goto done;
52 }
53 }
54
55 } else {
56 const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
57
58 if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) {
59 return;
60 }
61 }
62
63
64 route_message(C_HA_MESSAGE, msg);
65
66 done:
67 controld_trigger_fsa();
68 }
69
70
71
72
73
74
75
76
77
78
79
80 static int
81 node_alive(const crm_node_t *node)
82 {
83 if (pcmk_is_set(node->flags, crm_remote_node)) {
84
85 return pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei) ? 1: -1;
86
87 } else if (crm_is_peer_active(node)) {
88
89 return 1;
90
91 } else if (!pcmk_is_set(node->processes, crm_get_cluster_proc())
92 && !pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
93
94 return -1;
95 }
96
97
98 return 0;
99 }
100
101 #define state_text(state) ((state)? (const char *)(state) : "in unknown state")
102
103 void
104 peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
105 {
106 uint32_t old = 0;
107 bool appeared = FALSE;
108 bool is_remote = pcmk_is_set(node->flags, crm_remote_node);
109
110 controld_node_pending_timer(node);
111
112
113
114
115
116 if (!is_remote) {
117 controld_set_fsa_input_flags(R_PEER_DATA);
118 }
119
120 if (type == crm_status_processes
121 && pcmk_is_set(node->processes, crm_get_cluster_proc())
122 && !AM_I_DC
123 && !is_remote) {
124
125
126
127
128 xmlNode *query = create_request(CRM_OP_HELLO, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
129
130 crm_debug("Sending hello to node %u so that it learns our node name", node->id);
131 send_cluster_message(node, crm_msg_crmd, query, FALSE);
132
133 free_xml(query);
134 }
135
136 if (node->uname == NULL) {
137 return;
138 }
139
140 switch (type) {
141 case crm_status_uname:
142
143 crm_info("%s node %s is now %s",
144 (is_remote? "Remote" : "Cluster"),
145 node->uname, state_text(node->state));
146 return;
147
148 case crm_status_nstate:
149
150
151
152 CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei),
153 return);
154
155 crm_info("%s node %s is now %s (was %s)",
156 (is_remote? "Remote" : "Cluster"),
157 node->uname, state_text(node->state), state_text(data));
158
159 if (pcmk__str_eq(CRM_NODE_MEMBER, node->state, pcmk__str_casei)) {
160 appeared = TRUE;
161 if (!is_remote) {
162 remove_stonith_cleanup(node->uname);
163 }
164 } else {
165 controld_remove_failed_sync_node(node->uname);
166 controld_remove_voter(node->uname);
167 }
168
169 crmd_alert_node_event(node);
170 break;
171
172 case crm_status_processes:
173 CRM_CHECK(data != NULL, return);
174 old = *(const uint32_t *)data;
175 appeared = pcmk_is_set(node->processes, crm_get_cluster_proc());
176
177 {
178 const char *dc_s = controld_globals.dc_name;
179
180 if ((dc_s == NULL) && AM_I_DC) {
181 dc_s = "true";
182 }
183
184 crm_info("Node %s is %s a peer " CRM_XS
185 " DC=%s old=%#07x new=%#07x",
186 node->uname, (appeared? "now" : "no longer"),
187 pcmk__s(dc_s, "<none>"), old, node->processes);
188 }
189
190 if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) {
191
192
193
194 crm_trace("Process flag %#7x did not change from %#7x to %#7x",
195 crm_get_cluster_proc(), old, node->processes);
196 return;
197
198 }
199
200 if (!appeared) {
201 node->peer_lost = time(NULL);
202 controld_remove_failed_sync_node(node->uname);
203 controld_remove_voter(node->uname);
204 }
205
206 if (!pcmk_is_set(controld_globals.fsa_input_register,
207 R_CIB_CONNECTED)) {
208 crm_trace("Ignoring peer status change because not connected to CIB");
209 return;
210
211 } else if (controld_globals.fsa_state == S_STOPPING) {
212 crm_trace("Ignoring peer status change because stopping");
213 return;
214 }
215
216 if (!appeared
217 && pcmk__str_eq(node->uname, controld_globals.our_nodename,
218 pcmk__str_casei)) {
219
220 crm_notice("Our peer connection failed");
221 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
222
223 } else if (pcmk__str_eq(node->uname, controld_globals.dc_name,
224 pcmk__str_casei)
225 && !crm_is_peer_active(node)) {
226
227 crm_notice("Our peer on the DC (%s) is dead",
228 controld_globals.dc_name);
229 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
230
231
232
233
234
235
236
237
238
239 if (compare_version(controld_globals.dc_version, "3.0.9") > 0) {
240 controld_delete_node_state(node->uname,
241 controld_section_attrs,
242 cib_scope_local);
243 }
244
245 } else if (AM_I_DC
246 || pcmk_is_set(controld_globals.flags, controld_dc_left)
247 || (controld_globals.dc_name == NULL)) {
248
249
250
251
252 if (appeared) {
253 te_trigger_stonith_history_sync(FALSE);
254 } else {
255 controld_delete_node_state(node->uname,
256 controld_section_attrs,
257 cib_scope_local);
258 }
259 }
260 break;
261 }
262
263 if (AM_I_DC) {
264 xmlNode *update = NULL;
265 int flags = node_update_peer;
266 int alive = node_alive(node);
267 pcmk__graph_action_t *down = match_down_event(node->uuid);
268
269 crm_trace("Alive=%d, appeared=%d, down=%d",
270 alive, appeared, (down? down->id : -1));
271
272 if (appeared && (alive > 0) && !is_remote) {
273 register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
274 }
275
276 if (down) {
277 const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);
278
279 if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
280
281
282 crm_trace("Updating CIB %s fencer reported fencing of %s complete",
283 (pcmk_is_set(down->flags, pcmk__graph_action_confirmed)? "after" : "before"), node->uname);
284
285 } else if (!appeared && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN,
286 pcmk__str_casei)) {
287
288
289 if (!is_remote) {
290 flags |= node_update_join | node_update_expected;
291 crmd_peer_down(node, FALSE);
292 check_join_state(controld_globals.fsa_state, __func__);
293 }
294 if (alive >= 0) {
295 crm_info("%s of peer %s is in progress " CRM_XS " action=%d",
296 task, node->uname, down->id);
297 } else {
298 crm_notice("%s of peer %s is complete " CRM_XS " action=%d",
299 task, node->uname, down->id);
300 pcmk__update_graph(controld_globals.transition_graph, down);
301 trigger_graph();
302 }
303
304 } else {
305 crm_trace("Node %s is %s, was expected to %s (op %d)",
306 node->uname,
307 ((alive > 0)? "alive" :
308 ((alive < 0)? "dead" : "partially alive")),
309 task, down->id);
310 }
311
312 } else if (appeared == FALSE) {
313 if ((controld_globals.transition_graph == NULL)
314 || (controld_globals.transition_graph->id == -1)) {
315 crm_info("Stonith/shutdown of node %s is unknown to the "
316 "current DC", node->uname);
317 } else {
318 crm_warn("Stonith/shutdown of node %s was not expected",
319 node->uname);
320 }
321 if (!is_remote) {
322 crm_update_peer_join(__func__, node, crm_join_none);
323 check_join_state(controld_globals.fsa_state, __func__);
324 }
325 abort_transition(INFINITY, pcmk__graph_restart, "Node failure",
326 NULL);
327 fail_incompletable_actions(controld_globals.transition_graph,
328 node->uuid);
329
330 } else {
331 crm_trace("Node %s came up, was not expected to be down",
332 node->uname);
333 }
334
335 if (is_remote) {
336
337
338
339 flags |= node_update_cluster;
340
341
342 if (appeared) {
343 abort_transition(INFINITY, pcmk__graph_restart,
344 "Pacemaker Remote node integrated", NULL);
345 }
346 }
347
348 if (!appeared && (type == crm_status_processes)
349 && (node->when_member > 1)) {
350
351
352
353
354 node->when_member = 1;
355 flags |= node_update_cluster;
356 controld_node_pending_timer(node);
357 }
358
359
360 update = create_node_state_update(node, flags, NULL, __func__);
361 if (update == NULL) {
362 crm_debug("Node state update not yet possible for %s", node->uname);
363 } else {
364 fsa_cib_anon_update(XML_CIB_TAG_STATUS, update);
365 }
366 free_xml(update);
367 }
368
369 controld_trigger_fsa();
370 }
371
372 gboolean
373 crm_fsa_trigger(gpointer user_data)
374 {
375 crm_trace("Invoked (queue len: %d)",
376 g_list_length(controld_globals.fsa_message_queue));
377 s_crmd_fsa(C_FSA_INTERNAL);
378 crm_trace("Exited (queue len: %d)",
379 g_list_length(controld_globals.fsa_message_queue));
380 return TRUE;
381 }