This source file includes following definitions.
- crmd_ha_msg_filter
- node_alive
- peer_update_callback
- crm_fsa_trigger
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <inttypes.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15
16 #include <sys/param.h>
17 #include <string.h>
18
19 #include <crm/crm.h>
20 #include <crm/common/xml.h>
21 #include <crm/cluster.h>
22 #include <crm/cib.h>
23
24 #include <pacemaker-controld.h>
25
26
27 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
28
29 void
30 crmd_ha_msg_filter(xmlNode * msg)
31 {
32 if (AM_I_DC) {
33 const char *sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM);
34
35 if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) {
36 const char *from = crm_element_value(msg, PCMK__XA_SRC);
37
38 if (!controld_is_local_node(from)) {
39 int level = LOG_INFO;
40 const char *op = crm_element_value(msg, PCMK__XA_CRM_TASK);
41
42
43 if (controld_globals.fsa_state != S_ELECTION) {
44 ha_msg_input_t new_input;
45
46 level = LOG_WARNING;
47 new_input.msg = msg;
48 register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
49 __func__);
50 }
51
52 do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
53 goto done;
54 }
55 }
56
57 } else {
58 const char *sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
59
60 if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) {
61 return;
62 }
63 }
64
65
66 route_message(C_HA_MESSAGE, msg);
67
68 done:
69 controld_trigger_fsa();
70 }
71
72
73
74
75
76
77
78
79
80
81
82 static int
83 node_alive(const pcmk__node_status_t *node)
84 {
85 if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
86
87 if (pcmk__str_eq(node->state, PCMK_VALUE_MEMBER, pcmk__str_none)) {
88 return 1;
89 }
90 return -1;
91
92 } else if (pcmk__cluster_is_node_active(node)) {
93
94 return 1;
95
96 } else if (!pcmk_is_set(node->processes, crm_get_cluster_proc())
97 && !pcmk__str_eq(node->state, PCMK_VALUE_MEMBER,
98 pcmk__str_none)) {
99
100 return -1;
101 }
102
103
104 return 0;
105 }
106
107 #define state_text(state) ((state)? (const char *)(state) : "in unknown state")
108
109 void
110 peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node,
111 const void *data)
112 {
113 uint32_t old = 0;
114 bool appeared = FALSE;
115 bool is_remote = pcmk_is_set(node->flags, pcmk__node_status_remote);
116
117 controld_node_pending_timer(node);
118
119
120
121
122
123 if (!is_remote) {
124 controld_set_fsa_input_flags(R_PEER_DATA);
125 }
126
127 if ((type == pcmk__node_update_processes)
128 && pcmk_is_set(node->processes, crm_get_cluster_proc())
129 && !AM_I_DC
130 && !is_remote) {
131
132
133
134
135 xmlNode *query = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD,
136 NULL, CRM_SYSTEM_CRMD, CRM_OP_HELLO,
137 NULL);
138
139 crm_debug("Sending hello to node %" PRIu32 " so that it learns our "
140 "node name",
141 node->cluster_layer_id);
142 pcmk__cluster_send_message(node, pcmk_ipc_controld, query);
143
144 pcmk__xml_free(query);
145 }
146
147 if (node->name == NULL) {
148 return;
149 }
150
151 switch (type) {
152 case pcmk__node_update_name:
153
154 crm_info("%s node %s is now %s",
155 (is_remote? "Remote" : "Cluster"),
156 node->name, state_text(node->state));
157 return;
158
159 case pcmk__node_update_state:
160
161
162
163 CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei),
164 return);
165
166 crm_info("%s node %s is now %s (was %s)",
167 (is_remote? "Remote" : "Cluster"),
168 node->name, state_text(node->state), state_text(data));
169
170 if (pcmk__str_eq(PCMK_VALUE_MEMBER, node->state, pcmk__str_none)) {
171 appeared = TRUE;
172 if (!is_remote) {
173 remove_stonith_cleanup(node->name);
174 }
175 } else {
176 controld_remove_failed_sync_node(node->name);
177 controld_remove_voter(node->name);
178 }
179
180 crmd_alert_node_event(node);
181 break;
182
183 case pcmk__node_update_processes:
184 CRM_CHECK(data != NULL, return);
185 old = *(const uint32_t *)data;
186 appeared = pcmk_is_set(node->processes, crm_get_cluster_proc());
187
188 {
189 const char *dc_s = controld_globals.dc_name;
190
191 if ((dc_s == NULL) && AM_I_DC) {
192 dc_s = PCMK_VALUE_TRUE;
193 }
194
195 crm_info("Node %s is %s a peer " QB_XS
196 " DC=%s old=%#07x new=%#07x",
197 node->name, (appeared? "now" : "no longer"),
198 pcmk__s(dc_s, "<none>"), old, node->processes);
199 }
200
201 if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) {
202
203
204
205 crm_trace("Process flag %#7x did not change from %#7x to %#7x",
206 crm_get_cluster_proc(), old, node->processes);
207 return;
208
209 }
210
211 if (!appeared) {
212 node->peer_lost = time(NULL);
213 controld_remove_failed_sync_node(node->name);
214 controld_remove_voter(node->name);
215 }
216
217 if (!pcmk_is_set(controld_globals.fsa_input_register,
218 R_CIB_CONNECTED)) {
219 crm_trace("Ignoring peer status change because not connected to CIB");
220 return;
221
222 } else if (controld_globals.fsa_state == S_STOPPING) {
223 crm_trace("Ignoring peer status change because stopping");
224 return;
225 }
226
227 if (!appeared && controld_is_local_node(node->name)) {
228
229 crm_notice("Our peer connection failed");
230 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
231
232 } else if (pcmk__str_eq(node->name, controld_globals.dc_name,
233 pcmk__str_casei)
234 && !pcmk__cluster_is_node_active(node)) {
235
236
237
238
239
240
241
242
243 crm_notice("Our peer on the DC (%s) is dead",
244 controld_globals.dc_name);
245
246 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
247 controld_delete_node_state(node->name, controld_section_attrs,
248 cib_none);
249
250 } else if (AM_I_DC
251 || pcmk_is_set(controld_globals.flags, controld_dc_left)
252 || (controld_globals.dc_name == NULL)) {
253
254
255
256
257 if (appeared) {
258 te_trigger_stonith_history_sync(FALSE);
259 } else {
260 controld_delete_node_state(node->name,
261 controld_section_attrs,
262 cib_none);
263 }
264 }
265 break;
266 }
267
268 if (AM_I_DC) {
269 xmlNode *update = NULL;
270 int flags = node_update_peer;
271 int alive = node_alive(node);
272 pcmk__graph_action_t *down = match_down_event(node->xml_id);
273
274 crm_trace("Alive=%d, appeared=%d, down=%d",
275 alive, appeared, (down? down->id : -1));
276
277 if (appeared && (alive > 0) && !is_remote) {
278 register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
279 }
280
281 if (down) {
282 const char *task = crm_element_value(down->xml, PCMK_XA_OPERATION);
283
284 if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
285 const bool confirmed =
286 pcmk_is_set(down->flags, pcmk__graph_action_confirmed);
287
288
289 crm_trace("Updating CIB %s fencer reported fencing of %s complete",
290 (confirmed? "after" : "before"), node->name);
291
292 } else if (!appeared && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN,
293 pcmk__str_casei)) {
294
295
296 if (!is_remote) {
297 flags |= node_update_join | node_update_expected;
298 crmd_peer_down(node, FALSE);
299 check_join_state(controld_globals.fsa_state, __func__);
300 }
301 if (alive >= 0) {
302 crm_info("%s of peer %s is in progress " QB_XS " action=%d",
303 task, node->name, down->id);
304 } else {
305 crm_notice("%s of peer %s is complete " QB_XS " action=%d",
306 task, node->name, down->id);
307 pcmk__update_graph(controld_globals.transition_graph, down);
308 trigger_graph();
309 }
310
311 } else {
312 const char *liveness = "alive";
313
314 if (alive == 0) {
315 liveness = "partially alive";
316
317 } else if (alive < 0) {
318 liveness = "dead";
319 }
320
321 crm_trace("Node %s is %s, was expected to %s (op %d)",
322 node->name, liveness, task, down->id);
323 }
324
325 } else if (appeared == FALSE) {
326 if ((controld_globals.transition_graph == NULL)
327 || (controld_globals.transition_graph->id <= 0)) {
328 crm_info("Stonith/shutdown of node %s is unknown to the "
329 "current DC", node->name);
330 } else {
331 crm_warn("Stonith/shutdown of node %s was not expected",
332 node->name);
333 }
334 if (!is_remote) {
335 crm_update_peer_join(__func__, node, controld_join_none);
336 check_join_state(controld_globals.fsa_state, __func__);
337 }
338 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
339 "Node failure", NULL);
340 fail_incompletable_actions(controld_globals.transition_graph,
341 node->xml_id);
342
343 } else {
344 crm_trace("Node %s came up, was not expected to be down",
345 node->name);
346 }
347
348 if (is_remote) {
349
350
351
352 flags |= node_update_cluster;
353
354
355 if (appeared) {
356 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
357 "Pacemaker Remote node integrated", NULL);
358 }
359 }
360
361 if (!appeared && (type == pcmk__node_update_processes)
362 && (node->when_member > 1)) {
363
364
365
366
367
368 node->when_member = 1;
369 flags |= node_update_cluster;
370 controld_node_pending_timer(node);
371 }
372
373
374 update = create_node_state_update(node, flags, NULL, __func__);
375 if (update == NULL) {
376 crm_debug("Node state update not yet possible for %s", node->name);
377 } else {
378 fsa_cib_anon_update(PCMK_XE_STATUS, update);
379 }
380 pcmk__xml_free(update);
381 }
382
383 controld_trigger_fsa();
384 }
385
386 gboolean
387 crm_fsa_trigger(gpointer user_data)
388 {
389 crm_trace("Invoked (queue len: %d)",
390 g_list_length(controld_globals.fsa_message_queue));
391 s_crmd_fsa(C_FSA_INTERNAL);
392 crm_trace("Exited (queue len: %d)",
393 g_list_length(controld_globals.fsa_message_queue));
394 return TRUE;
395 }