This source file includes following definitions.
- reap_dead_nodes
- post_cache_update
- crmd_node_update_complete
- create_node_state_update
- remove_conflicting_node_callback
- search_conflicting_node_callback
- node_list_update_callback
- populate_cib_nodes
- cib_quorum_update_complete
- crm_update_quorum
1
2
3
4
5
6
7
8
9
10
11 #include <crm_internal.h>
12
13 #include <string.h>
14
15 #include <crm/crm.h>
16 #include <crm/common/xml.h>
17 #include <crm/common/xml_internal.h>
18 #include <crm/cluster/internal.h>
19
20 #include <pacemaker-controld.h>
21
22 void post_cache_update(int instance);
23
24 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
25
26 static void
27 reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
28 {
29 pcmk__node_status_t *node = value;
30
31 if (pcmk__cluster_is_node_active(node)) {
32 return;
33 }
34
35 crm_update_peer_join(__func__, node, controld_join_none);
36
37 if ((node != NULL) && (node->name != NULL)) {
38 if (controld_is_local_node(node->name)) {
39 crm_err("We're not part of the cluster anymore");
40 register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
41
42 } else if (!AM_I_DC
43 && pcmk__str_eq(node->name, controld_globals.dc_name,
44 pcmk__str_casei)) {
45 crm_warn("Our DC node (%s) left the cluster", node->name);
46 register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
47 }
48 }
49
50 if ((controld_globals.fsa_state == S_INTEGRATION)
51 || (controld_globals.fsa_state == S_FINALIZE_JOIN)) {
52 check_join_state(controld_globals.fsa_state, __func__);
53 }
54 if ((node != NULL) && (node->xml_id != NULL)) {
55 fail_incompletable_actions(controld_globals.transition_graph,
56 node->xml_id);
57 }
58 }
59
60 void
61 post_cache_update(int instance)
62 {
63 xmlNode *no_op = NULL;
64
65 controld_globals.peer_seq = instance;
66 crm_debug("Updated cache after membership event %d.", instance);
67
68 g_hash_table_foreach(pcmk__peer_cache, reap_dead_nodes, NULL);
69 controld_set_fsa_input_flags(R_MEMBERSHIP);
70
71 if (AM_I_DC) {
72 populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer |
73 node_update_expected, __func__);
74 }
75
76
77
78
79
80 controld_set_fsa_action_flags(A_ELECTION_CHECK);
81 controld_trigger_fsa();
82
83
84
85
86 no_op = pcmk__new_request(pcmk_ipc_controld,
87 (AM_I_DC? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD), NULL,
88 CRM_SYSTEM_CRMD, CRM_OP_NOOP, NULL);
89 pcmk__cluster_send_message(NULL, pcmk_ipc_controld, no_op);
90 pcmk__xml_free(no_op);
91 }
92
93 static void
94 crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
95 {
96 fsa_data_t *msg_data = NULL;
97
98 if (rc == pcmk_ok) {
99 crm_trace("Node update %d complete", call_id);
100
101 } else if(call_id < pcmk_ok) {
102 crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id);
103 crm_log_xml_debug(msg, "failed");
104 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
105
106 } else {
107 crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
108 crm_log_xml_debug(msg, "failed");
109 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
110 }
111 }
112
113
114
115
116
117
118
119
120
121
122
123
124 xmlNode *
125 create_node_state_update(pcmk__node_status_t *node, int flags,
126 xmlNode *parent, const char *source)
127 {
128 const char *value = NULL;
129 xmlNode *node_state;
130
131 if (!node->state) {
132 crm_info("Node update for %s cancelled: no state, not seen yet",
133 node->name);
134 return NULL;
135 }
136
137 node_state = pcmk__xe_create(parent, PCMK__XE_NODE_STATE);
138
139 if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
140 pcmk__xe_set_bool_attr(node_state, PCMK_XA_REMOTE_NODE, true);
141 }
142
143 if (crm_xml_add(node_state, PCMK_XA_ID,
144 pcmk__cluster_node_uuid(node)) == NULL) {
145 crm_info("Node update for %s cancelled: no ID", node->name);
146 pcmk__xml_free(node_state);
147 return NULL;
148 }
149
150 crm_xml_add(node_state, PCMK_XA_UNAME, node->name);
151
152 if ((flags & node_update_cluster) && node->state) {
153 if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) {
154
155 crm_xml_add_ll(node_state, PCMK__XA_IN_CCM, node->when_member);
156
157 } else {
158 pcmk__xe_set_bool_attr(node_state, PCMK__XA_IN_CCM,
159 pcmk__str_eq(node->state, PCMK_VALUE_MEMBER,
160 pcmk__str_none));
161 }
162 }
163
164 if (!pcmk_is_set(node->flags, pcmk__node_status_remote)) {
165 if (flags & node_update_peer) {
166 if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) {
167
168 crm_xml_add_ll(node_state, PCMK_XA_CRMD, node->when_online);
169
170 } else {
171
172 value = PCMK_VALUE_OFFLINE;
173 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
174 value = PCMK_VALUE_ONLINE;
175 }
176 crm_xml_add(node_state, PCMK_XA_CRMD, value);
177 }
178 }
179
180 if (flags & node_update_join) {
181 if (controld_get_join_phase(node) <= controld_join_none) {
182 value = CRMD_JOINSTATE_DOWN;
183 } else {
184 value = CRMD_JOINSTATE_MEMBER;
185 }
186 crm_xml_add(node_state, PCMK__XA_JOIN, value);
187 }
188
189 if (flags & node_update_expected) {
190 crm_xml_add(node_state, PCMK_XA_EXPECTED, node->expected);
191 }
192 }
193
194 crm_xml_add(node_state, PCMK_XA_CRM_DEBUG_ORIGIN, source);
195
196 return node_state;
197 }
198
199 static void
200 remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
201 xmlNode * output, void *user_data)
202 {
203 char *node_uuid = user_data;
204
205 do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
206 "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)",
207 node_uuid, pcmk_strerror(rc), rc);
208 }
209
210 static void
211 search_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
212 xmlNode * output, void *user_data)
213 {
214 char *new_node_uuid = user_data;
215 xmlNode *node_xml = NULL;
216
217 if (rc != pcmk_ok) {
218 if (rc != -ENXIO) {
219 crm_notice("Searching conflicting nodes for %s failed: %s (%d)",
220 new_node_uuid, pcmk_strerror(rc), rc);
221 }
222 return;
223
224 } else if (output == NULL) {
225 return;
226 }
227
228 if (pcmk__xe_is(output, PCMK_XE_NODE)) {
229 node_xml = output;
230
231 } else {
232 node_xml = pcmk__xe_first_child(output, PCMK_XE_NODE, NULL, NULL);
233 }
234
235 for (; node_xml != NULL; node_xml = pcmk__xe_next(node_xml, PCMK_XE_NODE)) {
236 const char *node_uuid = NULL;
237 const char *node_uname = NULL;
238 GHashTableIter iter;
239 pcmk__node_status_t *node = NULL;
240 gboolean known = FALSE;
241
242 node_uuid = crm_element_value(node_xml, PCMK_XA_ID);
243 node_uname = crm_element_value(node_xml, PCMK_XA_UNAME);
244
245 if (node_uuid == NULL || node_uname == NULL) {
246 continue;
247 }
248
249 g_hash_table_iter_init(&iter, pcmk__peer_cache);
250 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
251 if ((node != NULL)
252 && pcmk__str_eq(node->xml_id, node_uuid, pcmk__str_casei)
253 && pcmk__str_eq(node->name, node_uname, pcmk__str_casei)) {
254
255 known = TRUE;
256 break;
257 }
258 }
259
260 if (known == FALSE) {
261 cib_t *cib_conn = controld_globals.cib_conn;
262 int delete_call_id = 0;
263 xmlNode *node_state_xml = NULL;
264
265 crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s",
266 node_uuid, node_uname, new_node_uuid);
267
268 delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_NODES,
269 node_xml, cib_none);
270 fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid),
271 remove_conflicting_node_callback);
272
273 node_state_xml = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE);
274 crm_xml_add(node_state_xml, PCMK_XA_ID, node_uuid);
275 crm_xml_add(node_state_xml, PCMK_XA_UNAME, node_uname);
276
277 delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_STATUS,
278 node_state_xml, cib_none);
279 fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid),
280 remove_conflicting_node_callback);
281 pcmk__xml_free(node_state_xml);
282 }
283 }
284 }
285
286 static void
287 node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
288 {
289 fsa_data_t *msg_data = NULL;
290
291 if(call_id < pcmk_ok) {
292 crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id);
293 crm_log_xml_debug(msg, "update:failed");
294 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
295
296 } else if(rc < pcmk_ok) {
297 crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
298 crm_log_xml_debug(msg, "update:failed");
299 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
300 }
301 }
302
303 void
304 populate_cib_nodes(enum node_update_flags flags, const char *source)
305 {
306 cib_t *cib_conn = controld_globals.cib_conn;
307
308 int call_id = 0;
309 gboolean from_hashtable = TRUE;
310 xmlNode *node_list = pcmk__xe_create(NULL, PCMK_XE_NODES);
311
312 #if SUPPORT_COROSYNC
313 if (!pcmk_is_set(flags, node_update_quick)
314 && (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)) {
315
316 from_hashtable = pcmk__corosync_add_nodes(node_list);
317 }
318 #endif
319
320 if (from_hashtable) {
321 GHashTableIter iter;
322 pcmk__node_status_t *node = NULL;
323 GString *xpath = NULL;
324
325 g_hash_table_iter_init(&iter, pcmk__peer_cache);
326 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
327 xmlNode *new_node = NULL;
328
329 if ((node->xml_id != NULL) && (node->name != NULL)) {
330 crm_trace("Creating node entry for %s/%s",
331 node->name, node->xml_id);
332 if (xpath == NULL) {
333 xpath = g_string_sized_new(512);
334 } else {
335 g_string_truncate(xpath, 0);
336 }
337
338
339 new_node = pcmk__xe_create(node_list, PCMK_XE_NODE);
340 crm_xml_add(new_node, PCMK_XA_ID, node->xml_id);
341 crm_xml_add(new_node, PCMK_XA_UNAME, node->name);
342
343
344 pcmk__g_strcat(xpath,
345 "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION
346 "/" PCMK_XE_NODES "/" PCMK_XE_NODE
347 "[@" PCMK_XA_UNAME "='", node->name, "']"
348 "[@" PCMK_XA_ID "!='", node->xml_id, "']", NULL);
349
350 call_id = cib_conn->cmds->query(cib_conn,
351 (const char *) xpath->str, NULL,
352 cib_xpath);
353 fsa_register_cib_callback(call_id, pcmk__str_copy(node->xml_id),
354 search_conflicting_node_callback);
355 }
356 }
357
358 if (xpath != NULL) {
359 g_string_free(xpath, TRUE);
360 }
361 }
362
363 crm_trace("Populating <nodes> section from %s", from_hashtable ? "hashtable" : "cluster");
364
365 if ((controld_update_cib(PCMK_XE_NODES, node_list, cib_none,
366 node_list_update_callback) == pcmk_rc_ok)
367 && (pcmk__peer_cache != NULL) && AM_I_DC) {
368
369
370
371
372 GHashTableIter iter;
373 pcmk__node_status_t *node = NULL;
374
375 pcmk__xml_free(node_list);
376 node_list = pcmk__xe_create(NULL, PCMK_XE_STATUS);
377
378 g_hash_table_iter_init(&iter, pcmk__peer_cache);
379 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
380 create_node_state_update(node, flags, node_list, source);
381 }
382
383 if (pcmk__remote_peer_cache != NULL) {
384 g_hash_table_iter_init(&iter, pcmk__remote_peer_cache);
385 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
386 create_node_state_update(node, flags, node_list, source);
387 }
388 }
389
390 controld_update_cib(PCMK_XE_STATUS, node_list, cib_none,
391 crmd_node_update_complete);
392 }
393 pcmk__xml_free(node_list);
394 }
395
396 static void
397 cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
398 {
399 fsa_data_t *msg_data = NULL;
400
401 if (rc == pcmk_ok) {
402 crm_trace("Quorum update %d complete", call_id);
403
404 } else {
405 crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
406 crm_log_xml_debug(msg, "failed");
407 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
408 }
409 }
410
411 void
412 crm_update_quorum(gboolean quorum, gboolean force_update)
413 {
414 bool has_quorum = pcmk_is_set(controld_globals.flags, controld_has_quorum);
415
416 if (quorum) {
417 controld_set_global_flags(controld_ever_had_quorum);
418
419 } else if (pcmk_all_flags_set(controld_globals.flags,
420 controld_ever_had_quorum
421 |controld_no_quorum_panic)) {
422 pcmk__panic("Quorum lost");
423 }
424
425 if (AM_I_DC
426 && ((has_quorum && !quorum) || (!has_quorum && quorum)
427 || force_update)) {
428 xmlNode *update = NULL;
429
430 update = pcmk__xe_create(NULL, PCMK_XE_CIB);
431 crm_xml_add_int(update, PCMK_XA_HAVE_QUORUM, quorum);
432 crm_xml_add(update, PCMK_XA_DC_UUID, controld_globals.our_uuid);
433
434 crm_debug("Updating quorum status to %s", pcmk__btoa(quorum));
435 controld_update_cib(PCMK_XE_CIB, update, cib_none,
436 cib_quorum_update_complete);
437 pcmk__xml_free(update);
438
439
440
441
442
443
444
445
446
447
448
449 if (quorum) {
450
451
452
453
454 abort_after_delay(PCMK_SCORE_INFINITY, pcmk__graph_restart,
455 "Quorum gained", 5000);
456 } else {
457 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
458 "Quorum lost", NULL);
459 }
460 }
461
462 if (quorum) {
463 controld_set_global_flags(controld_has_quorum);
464 } else {
465 controld_clear_global_flags(controld_has_quorum);
466 }
467 }