This source file includes following definitions.
- crmd_ha_msg_filter
- node_alive
- peer_update_callback
- crm_fsa_trigger
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <string.h>
14
15 #include <crm/crm.h>
16 #include <crm/common/xml.h>
17 #include <crm/cluster.h>
18 #include <crm/cib.h>
19
20 #include <pacemaker-controld.h>
21
22
23 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
24
25 void
26 crmd_ha_msg_filter(xmlNode * msg)
27 {
28 if (AM_I_DC) {
29 const char *sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM);
30
31 if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) {
32 const char *from = crm_element_value(msg, PCMK__XA_SRC);
33
34 if (!pcmk__str_eq(from, controld_globals.our_nodename,
35 pcmk__str_casei)) {
36 int level = LOG_INFO;
37 const char *op = crm_element_value(msg, PCMK__XA_CRM_TASK);
38
39
40 if (controld_globals.fsa_state != S_ELECTION) {
41 ha_msg_input_t new_input;
42
43 level = LOG_WARNING;
44 new_input.msg = msg;
45 register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
46 __func__);
47 }
48
49 do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
50 goto done;
51 }
52 }
53
54 } else {
55 const char *sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
56
57 if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) {
58 return;
59 }
60 }
61
62
63 route_message(C_HA_MESSAGE, msg);
64
65 done:
66 controld_trigger_fsa();
67 }
68
69
70
71
72
73
74
75
76
77
78
79 static int
80 node_alive(const crm_node_t *node)
81 {
82 if (pcmk_is_set(node->flags, crm_remote_node)) {
83
84 return pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei) ? 1: -1;
85
86 } else if (pcmk__cluster_is_node_active(node)) {
87
88 return 1;
89
90 } else if (!pcmk_is_set(node->processes, crm_get_cluster_proc())
91 && !pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
92
93 return -1;
94 }
95
96
97 return 0;
98 }
99
100 #define state_text(state) ((state)? (const char *)(state) : "in unknown state")
101
102 void
103 peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
104 {
105 uint32_t old = 0;
106 bool appeared = FALSE;
107 bool is_remote = pcmk_is_set(node->flags, crm_remote_node);
108
109 controld_node_pending_timer(node);
110
111
112
113
114
115 if (!is_remote) {
116 controld_set_fsa_input_flags(R_PEER_DATA);
117 }
118
119 if (type == crm_status_processes
120 && pcmk_is_set(node->processes, crm_get_cluster_proc())
121 && !AM_I_DC
122 && !is_remote) {
123
124
125
126
127 xmlNode *query = create_request(CRM_OP_HELLO, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
128
129 crm_debug("Sending hello to node %u so that it learns our node name", node->id);
130 pcmk__cluster_send_message(node, crm_msg_crmd, query);
131
132 free_xml(query);
133 }
134
135 if (node->uname == NULL) {
136 return;
137 }
138
139 switch (type) {
140 case crm_status_uname:
141
142 crm_info("%s node %s is now %s",
143 (is_remote? "Remote" : "Cluster"),
144 node->uname, state_text(node->state));
145 return;
146
147 case crm_status_nstate:
148
149
150
151 CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei),
152 return);
153
154 crm_info("%s node %s is now %s (was %s)",
155 (is_remote? "Remote" : "Cluster"),
156 node->uname, state_text(node->state), state_text(data));
157
158 if (pcmk__str_eq(CRM_NODE_MEMBER, node->state, pcmk__str_casei)) {
159 appeared = TRUE;
160 if (!is_remote) {
161 remove_stonith_cleanup(node->uname);
162 }
163 } else {
164 controld_remove_failed_sync_node(node->uname);
165 controld_remove_voter(node->uname);
166 }
167
168 crmd_alert_node_event(node);
169 break;
170
171 case crm_status_processes:
172 CRM_CHECK(data != NULL, return);
173 old = *(const uint32_t *)data;
174 appeared = pcmk_is_set(node->processes, crm_get_cluster_proc());
175
176 {
177 const char *dc_s = controld_globals.dc_name;
178
179 if ((dc_s == NULL) && AM_I_DC) {
180 dc_s = PCMK_VALUE_TRUE;
181 }
182
183 crm_info("Node %s is %s a peer " CRM_XS
184 " DC=%s old=%#07x new=%#07x",
185 node->uname, (appeared? "now" : "no longer"),
186 pcmk__s(dc_s, "<none>"), old, node->processes);
187 }
188
189 if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) {
190
191
192
193 crm_trace("Process flag %#7x did not change from %#7x to %#7x",
194 crm_get_cluster_proc(), old, node->processes);
195 return;
196
197 }
198
199 if (!appeared) {
200 node->peer_lost = time(NULL);
201 controld_remove_failed_sync_node(node->uname);
202 controld_remove_voter(node->uname);
203 }
204
205 if (!pcmk_is_set(controld_globals.fsa_input_register,
206 R_CIB_CONNECTED)) {
207 crm_trace("Ignoring peer status change because not connected to CIB");
208 return;
209
210 } else if (controld_globals.fsa_state == S_STOPPING) {
211 crm_trace("Ignoring peer status change because stopping");
212 return;
213 }
214
215 if (!appeared
216 && pcmk__str_eq(node->uname, controld_globals.our_nodename,
217 pcmk__str_casei)) {
218
219 crm_notice("Our peer connection failed");
220 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
221
222 } else if (pcmk__str_eq(node->uname, controld_globals.dc_name,
223 pcmk__str_casei)
224 && !pcmk__cluster_is_node_active(node)) {
225
226 crm_notice("Our peer on the DC (%s) is dead",
227 controld_globals.dc_name);
228 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
229
230
231
232
233
234
235
236
237
238 if (compare_version(controld_globals.dc_version, "3.0.9") > 0) {
239 controld_delete_node_state(node->uname,
240 controld_section_attrs,
241 cib_scope_local);
242 }
243
244 } else if (AM_I_DC
245 || pcmk_is_set(controld_globals.flags, controld_dc_left)
246 || (controld_globals.dc_name == NULL)) {
247
248
249
250
251 if (appeared) {
252 te_trigger_stonith_history_sync(FALSE);
253 } else {
254 controld_delete_node_state(node->uname,
255 controld_section_attrs,
256 cib_scope_local);
257 }
258 }
259 break;
260 }
261
262 if (AM_I_DC) {
263 xmlNode *update = NULL;
264 int flags = node_update_peer;
265 int alive = node_alive(node);
266 pcmk__graph_action_t *down = match_down_event(node->uuid);
267
268 crm_trace("Alive=%d, appeared=%d, down=%d",
269 alive, appeared, (down? down->id : -1));
270
271 if (appeared && (alive > 0) && !is_remote) {
272 register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
273 }
274
275 if (down) {
276 const char *task = crm_element_value(down->xml, PCMK_XA_OPERATION);
277
278 if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
279
280
281 crm_trace("Updating CIB %s fencer reported fencing of %s complete",
282 (pcmk_is_set(down->flags, pcmk__graph_action_confirmed)? "after" : "before"), node->uname);
283
284 } else if (!appeared && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN,
285 pcmk__str_casei)) {
286
287
288 if (!is_remote) {
289 flags |= node_update_join | node_update_expected;
290 crmd_peer_down(node, FALSE);
291 check_join_state(controld_globals.fsa_state, __func__);
292 }
293 if (alive >= 0) {
294 crm_info("%s of peer %s is in progress " CRM_XS " action=%d",
295 task, node->uname, down->id);
296 } else {
297 crm_notice("%s of peer %s is complete " CRM_XS " action=%d",
298 task, node->uname, down->id);
299 pcmk__update_graph(controld_globals.transition_graph, down);
300 trigger_graph();
301 }
302
303 } else {
304 crm_trace("Node %s is %s, was expected to %s (op %d)",
305 node->uname,
306 ((alive > 0)? "alive" :
307 ((alive < 0)? "dead" : "partially alive")),
308 task, down->id);
309 }
310
311 } else if (appeared == FALSE) {
312 if ((controld_globals.transition_graph == NULL)
313 || (controld_globals.transition_graph->id <= 0)) {
314 crm_info("Stonith/shutdown of node %s is unknown to the "
315 "current DC", node->uname);
316 } else {
317 crm_warn("Stonith/shutdown of node %s was not expected",
318 node->uname);
319 }
320 if (!is_remote) {
321 crm_update_peer_join(__func__, node, crm_join_none);
322 check_join_state(controld_globals.fsa_state, __func__);
323 }
324 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
325 "Node failure", NULL);
326 fail_incompletable_actions(controld_globals.transition_graph,
327 node->uuid);
328
329 } else {
330 crm_trace("Node %s came up, was not expected to be down",
331 node->uname);
332 }
333
334 if (is_remote) {
335
336
337
338 flags |= node_update_cluster;
339
340
341 if (appeared) {
342 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
343 "Pacemaker Remote node integrated", NULL);
344 }
345 }
346
347 if (!appeared && (type == crm_status_processes)
348 && (node->when_member > 1)) {
349
350
351
352
353
354 node->when_member = 1;
355 flags |= node_update_cluster;
356 controld_node_pending_timer(node);
357 }
358
359
360 update = create_node_state_update(node, flags, NULL, __func__);
361 if (update == NULL) {
362 crm_debug("Node state update not yet possible for %s", node->uname);
363 } else {
364 fsa_cib_anon_update(PCMK_XE_STATUS, update);
365 }
366 free_xml(update);
367 }
368
369 controld_trigger_fsa();
370 }
371
372 gboolean
373 crm_fsa_trigger(gpointer user_data)
374 {
375 crm_trace("Invoked (queue len: %d)",
376 g_list_length(controld_globals.fsa_message_queue));
377 s_crmd_fsa(C_FSA_INTERNAL);
378 crm_trace("Exited (queue len: %d)",
379 g_list_length(controld_globals.fsa_message_queue));
380 return TRUE;
381 }