This source file includes following definitions.
- reap_dead_nodes
- post_cache_update
- crmd_node_update_complete
- create_node_state_update
- remove_conflicting_node_callback
- search_conflicting_node_callback
- populate_cib_nodes_from_cache
- node_list_update_callback
- populate_cib_nodes
- cib_quorum_update_complete
- crm_update_quorum
1
2
3
4
5
6
7
8
9
10
11 #include <crm_internal.h>
12
13 #include <stdint.h>
14 #include <string.h>
15
16 #include <crm/crm.h>
17 #include <crm/common/xml.h>
18 #include <crm/common/xml_internal.h>
19 #include <crm/cluster/internal.h>
20
21 #include <pacemaker-controld.h>
22
23 void post_cache_update(int instance);
24
25 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
26
27 static void
28 reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
29 {
30 pcmk__node_status_t *node = value;
31
32 if (pcmk__cluster_is_node_active(node)) {
33 return;
34 }
35
36 crm_update_peer_join(__func__, node, controld_join_none);
37
38 if ((node != NULL) && (node->name != NULL)) {
39 if (controld_is_local_node(node->name)) {
40 crm_err("We're not part of the cluster anymore");
41 register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
42
43 } else if (!AM_I_DC
44 && pcmk__str_eq(node->name, controld_globals.dc_name,
45 pcmk__str_casei)) {
46 crm_warn("Our DC node (%s) left the cluster", node->name);
47 register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
48 }
49 }
50
51 if ((controld_globals.fsa_state == S_INTEGRATION)
52 || (controld_globals.fsa_state == S_FINALIZE_JOIN)) {
53 check_join_state(controld_globals.fsa_state, __func__);
54 }
55 if ((node != NULL) && (node->xml_id != NULL)) {
56 fail_incompletable_actions(controld_globals.transition_graph,
57 node->xml_id);
58 }
59 }
60
61 void
62 post_cache_update(int instance)
63 {
64 xmlNode *no_op = NULL;
65
66 controld_globals.peer_seq = instance;
67 crm_debug("Updated cache after membership event %d.", instance);
68
69 g_hash_table_foreach(pcmk__peer_cache, reap_dead_nodes, NULL);
70 controld_set_fsa_input_flags(R_MEMBERSHIP);
71
72 if (AM_I_DC) {
73 populate_cib_nodes(controld_node_update_quick
74 |controld_node_update_cluster
75 |controld_node_update_peer
76 |controld_node_update_expected, __func__);
77 }
78
79
80
81
82
83 controld_set_fsa_action_flags(A_ELECTION_CHECK);
84 controld_trigger_fsa();
85
86
87
88
89 no_op = pcmk__new_request(pcmk_ipc_controld,
90 (AM_I_DC? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD), NULL,
91 CRM_SYSTEM_CRMD, CRM_OP_NOOP, NULL);
92 pcmk__cluster_send_message(NULL, pcmk_ipc_controld, no_op);
93 pcmk__xml_free(no_op);
94 }
95
96 static void
97 crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
98 {
99 fsa_data_t *msg_data = NULL;
100
101 if (rc == pcmk_ok) {
102 crm_trace("Node update %d complete", call_id);
103
104 } else if(call_id < pcmk_ok) {
105 crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id);
106 crm_log_xml_debug(msg, "failed");
107 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
108
109 } else {
110 crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
111 crm_log_xml_debug(msg, "failed");
112 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
113 }
114 }
115
116
117
118
119
120
121
122
123
124
125
126
127 xmlNode *
128 create_node_state_update(pcmk__node_status_t *node, uint32_t flags,
129 xmlNode *parent, const char *source)
130 {
131
132 const char *value = NULL;
133 xmlNode *node_state;
134
135 if (!node->state) {
136 crm_info("Node update for %s cancelled: no state, not seen yet",
137 node->name);
138 return NULL;
139 }
140
141 node_state = pcmk__xe_create(parent, PCMK__XE_NODE_STATE);
142
143 if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
144 pcmk__xe_set_bool_attr(node_state, PCMK_XA_REMOTE_NODE, true);
145 }
146
147 if (crm_xml_add(node_state, PCMK_XA_ID,
148 pcmk__cluster_get_xml_id(node)) == NULL) {
149 crm_info("Node update for %s cancelled: no ID", node->name);
150 pcmk__xml_free(node_state);
151 return NULL;
152 }
153
154 crm_xml_add(node_state, PCMK_XA_UNAME, node->name);
155
156 if (pcmk_is_set(flags, controld_node_update_cluster)) {
157 if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) {
158
159 crm_xml_add_ll(node_state, PCMK__XA_IN_CCM, node->when_member);
160
161 } else {
162 pcmk__xe_set_bool_attr(node_state, PCMK__XA_IN_CCM,
163 pcmk__str_eq(node->state, PCMK_VALUE_MEMBER,
164 pcmk__str_none));
165 }
166 }
167
168 if (!pcmk_is_set(node->flags, pcmk__node_status_remote)) {
169 if (pcmk_is_set(flags, controld_node_update_peer)) {
170 if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) {
171
172 crm_xml_add_ll(node_state, PCMK_XA_CRMD, node->when_online);
173
174 } else {
175
176 value = PCMK_VALUE_OFFLINE;
177 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
178 value = PCMK_VALUE_ONLINE;
179 }
180 crm_xml_add(node_state, PCMK_XA_CRMD, value);
181 }
182 }
183
184 if (pcmk_is_set(flags, controld_node_update_join)) {
185 if (controld_get_join_phase(node) <= controld_join_none) {
186 value = CRMD_JOINSTATE_DOWN;
187 } else {
188 value = CRMD_JOINSTATE_MEMBER;
189 }
190 crm_xml_add(node_state, PCMK__XA_JOIN, value);
191 }
192
193 if (pcmk_is_set(flags, controld_node_update_expected)) {
194 crm_xml_add(node_state, PCMK_XA_EXPECTED, node->expected);
195 }
196 }
197
198 crm_xml_add(node_state, PCMK_XA_CRM_DEBUG_ORIGIN, source);
199
200 return node_state;
201 }
202
203 static void
204 remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
205 xmlNode * output, void *user_data)
206 {
207 char *node_uuid = user_data;
208
209 do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
210 "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)",
211 node_uuid, pcmk_strerror(rc), rc);
212 }
213
214 static void
215 search_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
216 xmlNode * output, void *user_data)
217 {
218 char *new_node_uuid = user_data;
219 xmlNode *node_xml = NULL;
220
221 if (rc != pcmk_ok) {
222 if (rc != -ENXIO) {
223 crm_notice("Searching conflicting nodes for %s failed: %s (%d)",
224 new_node_uuid, pcmk_strerror(rc), rc);
225 }
226 return;
227
228 } else if (output == NULL) {
229 return;
230 }
231
232 if (pcmk__xe_is(output, PCMK_XE_NODE)) {
233 node_xml = output;
234
235 } else {
236 node_xml = pcmk__xe_first_child(output, PCMK_XE_NODE, NULL, NULL);
237 }
238
239 for (; node_xml != NULL; node_xml = pcmk__xe_next(node_xml, PCMK_XE_NODE)) {
240 const char *node_uuid = NULL;
241 const char *node_uname = NULL;
242 GHashTableIter iter;
243 pcmk__node_status_t *node = NULL;
244 gboolean known = FALSE;
245
246 node_uuid = crm_element_value(node_xml, PCMK_XA_ID);
247 node_uname = crm_element_value(node_xml, PCMK_XA_UNAME);
248
249 if (node_uuid == NULL || node_uname == NULL) {
250 continue;
251 }
252
253 g_hash_table_iter_init(&iter, pcmk__peer_cache);
254 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
255 if ((node != NULL)
256 && pcmk__str_eq(node->xml_id, node_uuid, pcmk__str_casei)
257 && pcmk__str_eq(node->name, node_uname, pcmk__str_casei)) {
258
259 known = TRUE;
260 break;
261 }
262 }
263
264 if (known == FALSE) {
265 cib_t *cib_conn = controld_globals.cib_conn;
266 int delete_call_id = 0;
267 xmlNode *node_state_xml = NULL;
268
269 crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s",
270 node_uuid, node_uname, new_node_uuid);
271
272 delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_NODES,
273 node_xml, cib_none);
274 fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid),
275 remove_conflicting_node_callback);
276
277 node_state_xml = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE);
278 crm_xml_add(node_state_xml, PCMK_XA_ID, node_uuid);
279 crm_xml_add(node_state_xml, PCMK_XA_UNAME, node_uname);
280
281 delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_STATUS,
282 node_state_xml, cib_none);
283 fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid),
284 remove_conflicting_node_callback);
285 pcmk__xml_free(node_state_xml);
286 }
287 }
288 }
289
290
291
292
293
294
295
296
297
298 static void
299 populate_cib_nodes_from_cache(xmlNode *nodes_xml)
300 {
301 GString *xpath = NULL;
302 GHashTableIter iter;
303 pcmk__node_status_t *node = NULL;
304
305 g_hash_table_iter_init(&iter, pcmk__peer_cache);
306 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
307 cib_t *cib_conn = controld_globals.cib_conn;
308 int call_id = 0;
309 xmlNode *new_node = NULL;
310
311 if ((node->xml_id == NULL) || (node->name == NULL)) {
312
313 continue;
314 }
315
316 crm_trace("Creating node entry for %s/%s", node->name, node->xml_id);
317
318 new_node = pcmk__xe_create(nodes_xml, PCMK_XE_NODE);
319 crm_xml_add(new_node, PCMK_XA_ID, node->xml_id);
320 crm_xml_add(new_node, PCMK_XA_UNAME, node->name);
321
322 if (xpath == NULL) {
323 xpath = g_string_sized_new(512);
324 } else {
325 g_string_truncate(xpath, 0);
326 }
327
328
329 pcmk__g_strcat(xpath,
330 "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION
331 "/" PCMK_XE_NODES "/" PCMK_XE_NODE
332 "[@" PCMK_XA_UNAME "='", node->name, "']"
333 "[@" PCMK_XA_ID "!='", node->xml_id, "']", NULL);
334
335 call_id = cib_conn->cmds->query(cib_conn, xpath->str, NULL, cib_xpath);
336 fsa_register_cib_callback(call_id, pcmk__str_copy(node->xml_id),
337 search_conflicting_node_callback);
338 }
339
340 if (xpath != NULL) {
341 g_string_free(xpath, TRUE);
342 }
343 }
344
345 static void
346 node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
347 {
348 fsa_data_t *msg_data = NULL;
349
350 if(call_id < pcmk_ok) {
351 crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id);
352 crm_log_xml_debug(msg, "update:failed");
353 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
354
355 } else if(rc < pcmk_ok) {
356 crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
357 crm_log_xml_debug(msg, "update:failed");
358 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
359 }
360 }
361
362 void
363 populate_cib_nodes(uint32_t flags, const char *source)
364 {
365 bool from_cache = true;
366 xmlNode *node_list = pcmk__xe_create(NULL, PCMK_XE_NODES);
367
368 GHashTableIter iter;
369 pcmk__node_status_t *node = NULL;
370
371 #if SUPPORT_COROSYNC
372 if (!pcmk_is_set(flags, controld_node_update_quick)
373 && (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)) {
374
375 from_cache = !pcmk__corosync_add_nodes(node_list);
376 }
377 #endif
378
379 crm_trace("Populating <" PCMK_XE_NODES "> section of CIB from %s",
380 (from_cache? "peer cache" : "cluster"));
381
382 if (from_cache) {
383 populate_cib_nodes_from_cache(node_list);
384 }
385
386 if (controld_update_cib(PCMK_XE_NODES, node_list, cib_none,
387 node_list_update_callback) != pcmk_rc_ok) {
388
389 goto done;
390 }
391 if (pcmk__peer_cache == NULL) {
392
393 goto done;
394 }
395 if (!AM_I_DC) {
396
397 goto done;
398 }
399
400 pcmk__xml_free(node_list);
401 node_list = pcmk__xe_create(NULL, PCMK_XE_STATUS);
402
403 g_hash_table_iter_init(&iter, pcmk__peer_cache);
404 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
405 create_node_state_update(node, flags, node_list, source);
406 }
407
408 if (pcmk__remote_peer_cache != NULL) {
409 g_hash_table_iter_init(&iter, pcmk__remote_peer_cache);
410 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
411 create_node_state_update(node, flags, node_list, source);
412 }
413 }
414
415 controld_update_cib(PCMK_XE_STATUS, node_list, cib_none,
416 crmd_node_update_complete);
417
418 done:
419 pcmk__xml_free(node_list);
420 }
421
422 static void
423 cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
424 {
425 fsa_data_t *msg_data = NULL;
426
427 if (rc == pcmk_ok) {
428 crm_trace("Quorum update %d complete", call_id);
429
430 } else {
431 crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
432 crm_log_xml_debug(msg, "failed");
433 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
434 }
435 }
436
437 void
438 crm_update_quorum(gboolean quorum, gboolean force_update)
439 {
440 bool has_quorum = pcmk_is_set(controld_globals.flags, controld_has_quorum);
441
442 if (quorum) {
443 controld_set_global_flags(controld_ever_had_quorum);
444
445 } else if (pcmk_all_flags_set(controld_globals.flags,
446 controld_ever_had_quorum
447 |controld_no_quorum_panic)) {
448 pcmk__panic("Quorum lost");
449 }
450
451 if (AM_I_DC
452 && ((has_quorum && !quorum) || (!has_quorum && quorum)
453 || force_update)) {
454 xmlNode *update = NULL;
455
456 update = pcmk__xe_create(NULL, PCMK_XE_CIB);
457 crm_xml_add_int(update, PCMK_XA_HAVE_QUORUM, quorum);
458 crm_xml_add(update, PCMK_XA_DC_UUID, controld_globals.our_uuid);
459
460 crm_debug("Updating quorum status to %s", pcmk__btoa(quorum));
461 controld_update_cib(PCMK_XE_CIB, update, cib_none,
462 cib_quorum_update_complete);
463 pcmk__xml_free(update);
464
465
466
467
468
469
470
471
472
473
474
475 if (quorum) {
476
477
478
479
480 abort_after_delay(PCMK_SCORE_INFINITY, pcmk__graph_restart,
481 "Quorum gained", 5000);
482 } else {
483 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
484 "Quorum lost", NULL);
485 }
486 }
487
488 if (quorum) {
489 controld_set_global_flags(controld_has_quorum);
490 } else {
491 controld_clear_global_flags(controld_has_quorum);
492 }
493 }