This source file includes following definitions.
- reap_dead_nodes
- post_cache_update
- crmd_node_update_complete
- create_node_state_update
- remove_conflicting_node_callback
- search_conflicting_node_callback
- node_list_update_callback
- populate_cib_nodes
- cib_quorum_update_complete
- crm_update_quorum
1
2
3
4
5
6
7
8
9
10
11 #include <crm_internal.h>
12
13 #include <string.h>
14
15 #include <crm/crm.h>
16 #include <crm/common/xml.h>
17 #include <crm/common/xml_internal.h>
18 #include <crm/cluster/internal.h>
19
20 #include <pacemaker-controld.h>
21
22 void post_cache_update(int instance);
23
24 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
25
26 static void
27 reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
28 {
29 crm_node_t *node = value;
30
31 if (!pcmk__cluster_is_node_active(node)) {
32 crm_update_peer_join(__func__, node, crm_join_none);
33
34 if(node && node->uname) {
35 if (pcmk__str_eq(controld_globals.our_nodename, node->uname,
36 pcmk__str_casei)) {
37 crm_err("We're not part of the cluster anymore");
38 register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
39
40 } else if (!AM_I_DC
41 && pcmk__str_eq(node->uname, controld_globals.dc_name,
42 pcmk__str_casei)) {
43 crm_warn("Our DC node (%s) left the cluster", node->uname);
44 register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
45 }
46 }
47
48 if ((controld_globals.fsa_state == S_INTEGRATION)
49 || (controld_globals.fsa_state == S_FINALIZE_JOIN)) {
50 check_join_state(controld_globals.fsa_state, __func__);
51 }
52 if ((node != NULL) && (node->uuid != NULL)) {
53 fail_incompletable_actions(controld_globals.transition_graph,
54 node->uuid);
55 }
56 }
57 }
58
59 void
60 post_cache_update(int instance)
61 {
62 xmlNode *no_op = NULL;
63
64 crm_peer_seq = instance;
65 crm_debug("Updated cache after membership event %d.", instance);
66
67 g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL);
68 controld_set_fsa_input_flags(R_MEMBERSHIP);
69
70 if (AM_I_DC) {
71 populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer |
72 node_update_expected, __func__);
73 }
74
75
76
77
78
79 controld_set_fsa_action_flags(A_ELECTION_CHECK);
80 controld_trigger_fsa();
81
82
83
84
85 no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD,
86 AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL);
87 pcmk__cluster_send_message(NULL, crm_msg_crmd, no_op);
88 free_xml(no_op);
89 }
90
91 static void
92 crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
93 {
94 fsa_data_t *msg_data = NULL;
95
96 if (rc == pcmk_ok) {
97 crm_trace("Node update %d complete", call_id);
98
99 } else if(call_id < pcmk_ok) {
100 crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id);
101 crm_log_xml_debug(msg, "failed");
102 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
103
104 } else {
105 crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
106 crm_log_xml_debug(msg, "failed");
107 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
108 }
109 }
110
111
112
113
114
115
116
117
118
119
120
121
122 xmlNode *
123 create_node_state_update(crm_node_t *node, int flags, xmlNode *parent,
124 const char *source)
125 {
126 const char *value = NULL;
127 xmlNode *node_state;
128
129 if (!node->state) {
130 crm_info("Node update for %s cancelled: no state, not seen yet", node->uname);
131 return NULL;
132 }
133
134 node_state = pcmk__xe_create(parent, PCMK__XE_NODE_STATE);
135
136 if (pcmk_is_set(node->flags, crm_remote_node)) {
137 pcmk__xe_set_bool_attr(node_state, PCMK_XA_REMOTE_NODE, true);
138 }
139
140 if (crm_xml_add(node_state, PCMK_XA_ID,
141 pcmk__cluster_node_uuid(node)) == NULL) {
142 crm_info("Node update for %s cancelled: no ID", node->uname);
143 free_xml(node_state);
144 return NULL;
145 }
146
147 crm_xml_add(node_state, PCMK_XA_UNAME, node->uname);
148
149 if ((flags & node_update_cluster) && node->state) {
150 if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) {
151
152 crm_xml_add_ll(node_state, PCMK__XA_IN_CCM, node->when_member);
153
154 } else {
155 pcmk__xe_set_bool_attr(node_state, PCMK__XA_IN_CCM,
156 pcmk__str_eq(node->state, CRM_NODE_MEMBER,
157 pcmk__str_casei));
158 }
159 }
160
161 if (!pcmk_is_set(node->flags, crm_remote_node)) {
162 if (flags & node_update_peer) {
163 if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) {
164
165 crm_xml_add_ll(node_state, PCMK_XA_CRMD, node->when_online);
166
167 } else {
168
169 value = PCMK_VALUE_OFFLINE;
170 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
171 value = PCMK_VALUE_ONLINE;
172 }
173 crm_xml_add(node_state, PCMK_XA_CRMD, value);
174 }
175 }
176
177 if (flags & node_update_join) {
178 if (node->join <= crm_join_none) {
179 value = CRMD_JOINSTATE_DOWN;
180 } else {
181 value = CRMD_JOINSTATE_MEMBER;
182 }
183 crm_xml_add(node_state, PCMK__XA_JOIN, value);
184 }
185
186 if (flags & node_update_expected) {
187 crm_xml_add(node_state, PCMK_XA_EXPECTED, node->expected);
188 }
189 }
190
191 crm_xml_add(node_state, PCMK_XA_CRM_DEBUG_ORIGIN, source);
192
193 return node_state;
194 }
195
196 static void
197 remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
198 xmlNode * output, void *user_data)
199 {
200 char *node_uuid = user_data;
201
202 do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
203 "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)",
204 node_uuid, pcmk_strerror(rc), rc);
205 }
206
207 static void
208 search_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
209 xmlNode * output, void *user_data)
210 {
211 char *new_node_uuid = user_data;
212 xmlNode *node_xml = NULL;
213
214 if (rc != pcmk_ok) {
215 if (rc != -ENXIO) {
216 crm_notice("Searching conflicting nodes for %s failed: %s (%d)",
217 new_node_uuid, pcmk_strerror(rc), rc);
218 }
219 return;
220
221 } else if (output == NULL) {
222 return;
223 }
224
225 if (pcmk__xe_is(output, PCMK_XE_NODE)) {
226 node_xml = output;
227
228 } else {
229 node_xml = pcmk__xe_first_child(output, PCMK_XE_NODE, NULL, NULL);
230 }
231
232 for (; node_xml != NULL; node_xml = pcmk__xe_next_same(node_xml)) {
233 const char *node_uuid = NULL;
234 const char *node_uname = NULL;
235 GHashTableIter iter;
236 crm_node_t *node = NULL;
237 gboolean known = FALSE;
238
239 node_uuid = crm_element_value(node_xml, PCMK_XA_ID);
240 node_uname = crm_element_value(node_xml, PCMK_XA_UNAME);
241
242 if (node_uuid == NULL || node_uname == NULL) {
243 continue;
244 }
245
246 g_hash_table_iter_init(&iter, crm_peer_cache);
247 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
248 if (node->uuid
249 && pcmk__str_eq(node->uuid, node_uuid, pcmk__str_casei)
250 && node->uname
251 && pcmk__str_eq(node->uname, node_uname, pcmk__str_casei)) {
252
253 known = TRUE;
254 break;
255 }
256 }
257
258 if (known == FALSE) {
259 cib_t *cib_conn = controld_globals.cib_conn;
260 int delete_call_id = 0;
261 xmlNode *node_state_xml = NULL;
262
263 crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s",
264 node_uuid, node_uname, new_node_uuid);
265
266 delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_NODES,
267 node_xml, cib_scope_local);
268 fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid),
269 remove_conflicting_node_callback);
270
271 node_state_xml = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE);
272 crm_xml_add(node_state_xml, PCMK_XA_ID, node_uuid);
273 crm_xml_add(node_state_xml, PCMK_XA_UNAME, node_uname);
274
275 delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_STATUS,
276 node_state_xml,
277 cib_scope_local);
278 fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid),
279 remove_conflicting_node_callback);
280 free_xml(node_state_xml);
281 }
282 }
283 }
284
285 static void
286 node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
287 {
288 fsa_data_t *msg_data = NULL;
289
290 if(call_id < pcmk_ok) {
291 crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id);
292 crm_log_xml_debug(msg, "update:failed");
293 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
294
295 } else if(rc < pcmk_ok) {
296 crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
297 crm_log_xml_debug(msg, "update:failed");
298 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
299 }
300 }
301
302 void
303 populate_cib_nodes(enum node_update_flags flags, const char *source)
304 {
305 cib_t *cib_conn = controld_globals.cib_conn;
306
307 int call_id = 0;
308 gboolean from_hashtable = TRUE;
309 xmlNode *node_list = pcmk__xe_create(NULL, PCMK_XE_NODES);
310
311 #if SUPPORT_COROSYNC
312 if (!pcmk_is_set(flags, node_update_quick)
313 && (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)) {
314
315 from_hashtable = pcmk__corosync_add_nodes(node_list);
316 }
317 #endif
318
319 if (from_hashtable) {
320 GHashTableIter iter;
321 crm_node_t *node = NULL;
322 GString *xpath = NULL;
323
324 g_hash_table_iter_init(&iter, crm_peer_cache);
325 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
326 xmlNode *new_node = NULL;
327
328 if ((node->uuid != NULL) && (node->uname != NULL)) {
329 crm_trace("Creating node entry for %s/%s", node->uname, node->uuid);
330 if (xpath == NULL) {
331 xpath = g_string_sized_new(512);
332 } else {
333 g_string_truncate(xpath, 0);
334 }
335
336
337 new_node = pcmk__xe_create(node_list, PCMK_XE_NODE);
338 crm_xml_add(new_node, PCMK_XA_ID, node->uuid);
339 crm_xml_add(new_node, PCMK_XA_UNAME, node->uname);
340
341
342 pcmk__g_strcat(xpath,
343 "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION
344 "/" PCMK_XE_NODES "/" PCMK_XE_NODE
345 "[@" PCMK_XA_UNAME "='", node->uname, "']"
346 "[@" PCMK_XA_ID "!='", node->uuid, "']", NULL);
347
348 call_id = cib_conn->cmds->query(cib_conn,
349 (const char *) xpath->str,
350 NULL,
351 cib_scope_local|cib_xpath);
352 fsa_register_cib_callback(call_id, pcmk__str_copy(node->uuid),
353 search_conflicting_node_callback);
354 }
355 }
356
357 if (xpath != NULL) {
358 g_string_free(xpath, TRUE);
359 }
360 }
361
362 crm_trace("Populating <nodes> section from %s", from_hashtable ? "hashtable" : "cluster");
363
364 if ((controld_update_cib(PCMK_XE_NODES, node_list, cib_scope_local,
365 node_list_update_callback) == pcmk_rc_ok)
366 && (crm_peer_cache != NULL) && AM_I_DC) {
367
368
369
370
371 GHashTableIter iter;
372 crm_node_t *node = NULL;
373
374 free_xml(node_list);
375 node_list = pcmk__xe_create(NULL, PCMK_XE_STATUS);
376
377 g_hash_table_iter_init(&iter, crm_peer_cache);
378 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
379 create_node_state_update(node, flags, node_list, source);
380 }
381
382 if (crm_remote_peer_cache) {
383 g_hash_table_iter_init(&iter, crm_remote_peer_cache);
384 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
385 create_node_state_update(node, flags, node_list, source);
386 }
387 }
388
389 controld_update_cib(PCMK_XE_STATUS, node_list, cib_scope_local,
390 crmd_node_update_complete);
391 }
392 free_xml(node_list);
393 }
394
395 static void
396 cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
397 {
398 fsa_data_t *msg_data = NULL;
399
400 if (rc == pcmk_ok) {
401 crm_trace("Quorum update %d complete", call_id);
402
403 } else {
404 crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
405 crm_log_xml_debug(msg, "failed");
406 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
407 }
408 }
409
410 void
411 crm_update_quorum(gboolean quorum, gboolean force_update)
412 {
413 bool has_quorum = pcmk_is_set(controld_globals.flags, controld_has_quorum);
414
415 if (quorum) {
416 controld_set_global_flags(controld_ever_had_quorum);
417
418 } else if (pcmk_all_flags_set(controld_globals.flags,
419 controld_ever_had_quorum
420 |controld_no_quorum_suicide)) {
421 pcmk__panic(__func__);
422 }
423
424 if (AM_I_DC
425 && ((has_quorum && !quorum) || (!has_quorum && quorum)
426 || force_update)) {
427 xmlNode *update = NULL;
428
429 update = pcmk__xe_create(NULL, PCMK_XE_CIB);
430 crm_xml_add_int(update, PCMK_XA_HAVE_QUORUM, quorum);
431 crm_xml_add(update, PCMK_XA_DC_UUID, controld_globals.our_uuid);
432
433 crm_debug("Updating quorum status to %s", pcmk__btoa(quorum));
434 controld_update_cib(PCMK_XE_CIB, update, cib_scope_local,
435 cib_quorum_update_complete);
436 free_xml(update);
437
438
439
440
441
442
443
444
445
446
447
448 if (quorum) {
449
450
451
452
453 abort_after_delay(PCMK_SCORE_INFINITY, pcmk__graph_restart,
454 "Quorum gained", 5000);
455 } else {
456 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
457 "Quorum lost", NULL);
458 }
459 }
460
461 if (quorum) {
462 controld_set_global_flags(controld_has_quorum);
463 } else {
464 controld_clear_global_flags(controld_has_quorum);
465 }
466 }