This source file includes following definitions.
- crmd_ha_msg_filter
- node_alive
- peer_update_callback
- crm_fsa_trigger
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <inttypes.h>
13 #include <stdbool.h>
14 #include <stdint.h>
15 #include <stdio.h>
16
17 #include <sys/param.h>
18 #include <string.h>
19
20 #include <crm/crm.h>
21 #include <crm/common/xml.h>
22 #include <crm/cluster.h>
23 #include <crm/cib.h>
24
25 #include <pacemaker-controld.h>
26
27
28 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
29
30 void
31 crmd_ha_msg_filter(xmlNode * msg)
32 {
33 if (AM_I_DC) {
34 const char *sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM);
35
36 if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) {
37 const char *from = crm_element_value(msg, PCMK__XA_SRC);
38
39 if (!controld_is_local_node(from)) {
40 int level = LOG_INFO;
41 const char *op = crm_element_value(msg, PCMK__XA_CRM_TASK);
42
43
44 if (controld_globals.fsa_state != S_ELECTION) {
45 ha_msg_input_t new_input;
46
47 level = LOG_WARNING;
48 new_input.msg = msg;
49 register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
50 __func__);
51 }
52
53 do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
54 goto done;
55 }
56 }
57
58 } else {
59 const char *sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
60
61 if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) {
62 return;
63 }
64 }
65
66
67 route_message(C_HA_MESSAGE, msg);
68
69 done:
70 controld_trigger_fsa();
71 }
72
73
74
75
76
77
78
79
80
81
82
83 static int
84 node_alive(const pcmk__node_status_t *node)
85 {
86 if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
87
88 if (pcmk__str_eq(node->state, PCMK_VALUE_MEMBER, pcmk__str_none)) {
89 return 1;
90 }
91 return -1;
92
93 } else if (pcmk__cluster_is_node_active(node)) {
94
95 return 1;
96
97 } else if (!pcmk_is_set(node->processes, crm_get_cluster_proc())
98 && !pcmk__str_eq(node->state, PCMK_VALUE_MEMBER,
99 pcmk__str_none)) {
100
101 return -1;
102 }
103
104
105 return 0;
106 }
107
108 #define state_text(state) ((state)? (const char *)(state) : "in unknown state")
109
110
111 void
112 peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node,
113 const void *data)
114 {
115 uint32_t old = 0;
116 bool appeared = FALSE;
117 bool is_remote = pcmk_is_set(node->flags, pcmk__node_status_remote);
118
119 controld_node_pending_timer(node);
120
121
122
123
124
125 if (!is_remote) {
126 controld_set_fsa_input_flags(R_PEER_DATA);
127 }
128
129 if ((type == pcmk__node_update_processes)
130 && pcmk_is_set(node->processes, crm_get_cluster_proc())
131 && !AM_I_DC
132 && !is_remote) {
133
134
135
136 xmlNode *query = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD,
137 NULL, CRM_SYSTEM_CRMD, CRM_OP_HELLO,
138 NULL);
139
140 crm_debug("Sending hello to node %" PRIu32 " so that it learns our "
141 "node name",
142 node->cluster_layer_id);
143 pcmk__cluster_send_message(node, pcmk_ipc_controld, query);
144 pcmk__xml_free(query);
145 }
146
147 if (node->name == NULL) {
148 return;
149 }
150
151 switch (type) {
152 case pcmk__node_update_name:
153
154 crm_info("%s node %s is now %s",
155 (is_remote? "Remote" : "Cluster"),
156 node->name, state_text(node->state));
157 return;
158
159 case pcmk__node_update_state:
160
161
162
163 CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei),
164 return);
165
166 crm_info("%s node %s is now %s (was %s)",
167 (is_remote? "Remote" : "Cluster"),
168 node->name, state_text(node->state), state_text(data));
169
170 if (pcmk__str_eq(PCMK_VALUE_MEMBER, node->state, pcmk__str_none)) {
171 appeared = TRUE;
172 if (!is_remote) {
173 remove_stonith_cleanup(node->name);
174 }
175 } else {
176 controld_remove_failed_sync_node(node->name);
177 controld_remove_voter(node->name);
178 }
179
180 crmd_alert_node_event(node);
181 break;
182
183 case pcmk__node_update_processes:
184 CRM_CHECK(data != NULL, return);
185 old = *(const uint32_t *)data;
186 appeared = pcmk_is_set(node->processes, crm_get_cluster_proc());
187
188 {
189 const char *dc_s = controld_globals.dc_name;
190
191 if ((dc_s == NULL) && AM_I_DC) {
192 dc_s = PCMK_VALUE_TRUE;
193 }
194
195 crm_info("Node %s is %s a peer " QB_XS
196 " DC=%s old=%#07x new=%#07x",
197 node->name, (appeared? "now" : "no longer"),
198 pcmk__s(dc_s, "<none>"), old, node->processes);
199 }
200
201 if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) {
202
203
204
205 crm_trace("Process flag %#7x did not change from %#7x to %#7x",
206 crm_get_cluster_proc(), old, node->processes);
207 return;
208
209 }
210
211 if (!appeared) {
212 node->peer_lost = time(NULL);
213 controld_remove_failed_sync_node(node->name);
214 controld_remove_voter(node->name);
215 }
216
217 if (!pcmk_is_set(controld_globals.fsa_input_register,
218 R_CIB_CONNECTED)) {
219 crm_trace("Ignoring peer status change because not connected to CIB");
220 return;
221
222 } else if (controld_globals.fsa_state == S_STOPPING) {
223 crm_trace("Ignoring peer status change because stopping");
224 return;
225 }
226
227 if (!appeared && controld_is_local_node(node->name)) {
228
229 crm_notice("Our peer connection failed");
230 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
231
232 } else if (pcmk__str_eq(node->name, controld_globals.dc_name,
233 pcmk__str_casei)
234 && !pcmk__cluster_is_node_active(node)) {
235
236
237
238
239
240
241
242
243 crm_notice("Our peer on the DC (%s) is dead",
244 controld_globals.dc_name);
245
246 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
247 controld_delete_node_state(node->name, controld_section_attrs,
248 cib_none);
249
250 } else if (AM_I_DC
251 || pcmk_is_set(controld_globals.flags, controld_dc_left)
252 || (controld_globals.dc_name == NULL)) {
253
254
255
256
257 if (appeared) {
258 te_trigger_stonith_history_sync(FALSE);
259 } else {
260 controld_delete_node_state(node->name,
261 controld_section_attrs,
262 cib_none);
263 }
264 }
265 break;
266 }
267
268 if (AM_I_DC) {
269 xmlNode *update = NULL;
270 uint32_t flags = controld_node_update_peer;
271 int alive = node_alive(node);
272 pcmk__graph_action_t *down = match_down_event(node->xml_id);
273
274 crm_trace("Alive=%d, appeared=%d, down=%d",
275 alive, appeared, (down? down->id : -1));
276
277 if (appeared && (alive > 0) && !is_remote) {
278 register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
279 }
280
281 if (down) {
282 const char *task = crm_element_value(down->xml, PCMK_XA_OPERATION);
283
284 if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
285 const bool confirmed =
286 pcmk_is_set(down->flags, pcmk__graph_action_confirmed);
287
288
289 crm_trace("Updating CIB %s fencer reported fencing of %s complete",
290 (confirmed? "after" : "before"), node->name);
291
292 } else if (!appeared && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN,
293 pcmk__str_casei)) {
294
295
296 if (!is_remote) {
297 flags |= controld_node_update_join
298 |controld_node_update_expected;
299 crmd_peer_down(node, FALSE);
300 check_join_state(controld_globals.fsa_state, __func__);
301 }
302 if (alive >= 0) {
303 crm_info("%s of peer %s is in progress " QB_XS " action=%d",
304 task, node->name, down->id);
305 } else {
306 crm_notice("%s of peer %s is complete " QB_XS " action=%d",
307 task, node->name, down->id);
308 pcmk__update_graph(controld_globals.transition_graph, down);
309 trigger_graph();
310 }
311
312 } else {
313 const char *liveness = "alive";
314
315 if (alive == 0) {
316 liveness = "partially alive";
317
318 } else if (alive < 0) {
319 liveness = "dead";
320 }
321
322 crm_trace("Node %s is %s, was expected to %s (op %d)",
323 node->name, liveness, task, down->id);
324 }
325
326 } else if (appeared == FALSE) {
327 if ((controld_globals.transition_graph == NULL)
328 || (controld_globals.transition_graph->id <= 0)) {
329 crm_info("Stonith/shutdown of node %s is unknown to the "
330 "current DC", node->name);
331 } else {
332 crm_warn("Stonith/shutdown of node %s was not expected",
333 node->name);
334 }
335 if (!is_remote) {
336 crm_update_peer_join(__func__, node, controld_join_none);
337 check_join_state(controld_globals.fsa_state, __func__);
338 }
339 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
340 "Node failure", NULL);
341 fail_incompletable_actions(controld_globals.transition_graph,
342 node->xml_id);
343
344 } else {
345 crm_trace("Node %s came up, was not expected to be down",
346 node->name);
347 }
348
349 if (is_remote) {
350
351
352
353 flags |= controld_node_update_cluster;
354
355
356 if (appeared) {
357 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
358 "Pacemaker Remote node integrated", NULL);
359 }
360 }
361
362 if (!appeared && (type == pcmk__node_update_processes)
363 && (node->when_member > 1)) {
364
365
366
367
368
369 node->when_member = 1;
370 flags |= controld_node_update_cluster;
371 controld_node_pending_timer(node);
372 }
373
374
375 update = create_node_state_update(node, flags, NULL, __func__);
376 if (update == NULL) {
377 crm_debug("Node state update not yet possible for %s", node->name);
378 } else {
379 fsa_cib_anon_update(PCMK_XE_STATUS, update);
380 }
381 pcmk__xml_free(update);
382 }
383
384 controld_trigger_fsa();
385 }
386
387 gboolean
388 crm_fsa_trigger(gpointer user_data)
389 {
390 crm_trace("Invoked (queue len: %d)",
391 g_list_length(controld_globals.fsa_message_queue));
392 s_crmd_fsa(C_FSA_INTERNAL);
393 crm_trace("Exited (queue len: %d)",
394 g_list_length(controld_globals.fsa_message_queue));
395 return TRUE;
396 }