This source file includes following definitions.
- controld_destroy_failed_sync_table
- controld_remove_failed_sync_node
- record_failed_sync_node
- lookup_failed_sync_node
- crm_update_peer_join
- start_join_round
- create_dc_message
- join_make_offer
- do_dc_join_offer_all
- do_dc_join_offer_one
- compare_int_fields
- do_dc_join_filter_offer
- do_dc_join_finalize
- free_max_generation
- finalize_sync_callback
- join_node_state_commit_callback
- do_dc_join_ack
- finalize_join_for
- check_join_state
- do_dc_join_final
- crmd_join_phase_count
- crmd_join_phase_log
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <crm/crm.h>
13
14 #include <crm/common/xml.h>
15 #include <crm/cluster.h>
16
17 #include <pacemaker-controld.h>
18
19 static char *max_generation_from = NULL;
20 static xmlNodePtr max_generation_xml = NULL;
21
22
23
24
25
26
27
28
29
30
31
32 static GHashTable *failed_sync_nodes = NULL;
33
34 void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
35 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
36 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
37
38
39
40
41 static int current_join_id = 0;
42
43
44
45
46
47 void
48 controld_destroy_failed_sync_table(void)
49 {
50 if (failed_sync_nodes != NULL) {
51 g_hash_table_destroy(failed_sync_nodes);
52 failed_sync_nodes = NULL;
53 }
54 }
55
56
57
58
59
60
61
62 void
63 controld_remove_failed_sync_node(const char *node_name)
64 {
65 if (failed_sync_nodes != NULL) {
66 g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
67 }
68 }
69
70
71
72
73
74
75
76
77 static void
78 record_failed_sync_node(const char *node_name, gint join_id)
79 {
80 if (failed_sync_nodes == NULL) {
81 failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
82 }
83
84
85
86
87 CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
88 GINT_TO_POINTER(join_id)));
89 }
90
91
92
93
94
95
96
97
98
99
100
101
102 static int
103 lookup_failed_sync_node(const char *node_name, gint *join_id)
104 {
105 *join_id = -1;
106
107 if (failed_sync_nodes != NULL) {
108 gpointer result = g_hash_table_lookup(failed_sync_nodes,
109 (gchar *) node_name);
110 if (result != NULL) {
111 *join_id = GPOINTER_TO_INT(result);
112 return pcmk_rc_ok;
113 }
114 }
115 return pcmk_rc_node_unknown;
116 }
117
118 void
119 crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
120 {
121 enum crm_join_phase last = 0;
122
123 CRM_CHECK(node != NULL, return);
124
125
126 if (pcmk_is_set(node->flags, crm_remote_node)) {
127 return;
128 }
129
130 last = node->join;
131
132 if(phase == last) {
133 crm_trace("Node %s join-%d phase is still %s "
134 CRM_XS " nodeid=%u source=%s",
135 node->uname, current_join_id, crm_join_phase_str(last),
136 node->id, source);
137
138 } else if ((phase <= crm_join_none) || (phase == (last + 1))) {
139 node->join = phase;
140 crm_trace("Node %s join-%d phase is now %s (was %s) "
141 CRM_XS " nodeid=%u source=%s",
142 node->uname, current_join_id, crm_join_phase_str(phase),
143 crm_join_phase_str(last), node->id, source);
144
145 } else {
146 crm_warn("Rejecting join-%d phase update for node %s because "
147 "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
148 current_join_id, node->uname, crm_join_phase_str(last),
149 crm_join_phase_str(phase), node->id, source);
150 }
151 }
152
153 static void
154 start_join_round(void)
155 {
156 GHashTableIter iter;
157 crm_node_t *peer = NULL;
158
159 crm_debug("Starting new join round join-%d", current_join_id);
160
161 g_hash_table_iter_init(&iter, crm_peer_cache);
162 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
163 crm_update_peer_join(__func__, peer, crm_join_none);
164 }
165 if (max_generation_from != NULL) {
166 free(max_generation_from);
167 max_generation_from = NULL;
168 }
169 if (max_generation_xml != NULL) {
170 free_xml(max_generation_xml);
171 max_generation_xml = NULL;
172 }
173 controld_clear_fsa_input_flags(R_HAVE_CIB);
174 }
175
176
177
178
179
180
181
182
183 static xmlNode *
184 create_dc_message(const char *join_op, const char *host_to)
185 {
186 xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
187 CRM_SYSTEM_DC, NULL);
188
189
190 crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id);
191
192
193
194
195 pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING,
196 pcmk_is_set(controld_globals.fsa_input_register,
197 R_SHUTDOWN));
198 return msg;
199 }
200
201 static void
202 join_make_offer(gpointer key, gpointer value, gpointer user_data)
203 {
204 xmlNode *offer = NULL;
205 crm_node_t *member = (crm_node_t *)value;
206
207 pcmk__assert(member != NULL);
208 if (!pcmk__cluster_is_node_active(member)) {
209 crm_info("Not making join-%d offer to inactive node %s",
210 current_join_id,
211 (member->uname? member->uname : "with unknown name"));
212 if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
213
214
215
216
217
218
219
220
221
222 pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
223 }
224 return;
225 }
226
227 if (member->uname == NULL) {
228 crm_info("Not making join-%d offer to node uuid %s with unknown name",
229 current_join_id, member->uuid);
230 return;
231 }
232
233 if (controld_globals.membership_id != crm_peer_seq) {
234 controld_globals.membership_id = crm_peer_seq;
235 crm_info("Making join-%d offers based on membership event %llu",
236 current_join_id, crm_peer_seq);
237 }
238
239 if(user_data && member->join > crm_join_none) {
240 crm_info("Not making join-%d offer to already known node %s (%s)",
241 current_join_id, member->uname,
242 crm_join_phase_str(member->join));
243 return;
244 }
245
246 crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);
247
248 offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
249
250
251 crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
252
253 crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
254 pcmk__cluster_send_message(member, crm_msg_crmd, offer);
255 free_xml(offer);
256
257 crm_update_peer_join(__func__, member, crm_join_welcomed);
258 }
259
260
261 void
262 do_dc_join_offer_all(long long action,
263 enum crmd_fsa_cause cause,
264 enum crmd_fsa_state cur_state,
265 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
266 {
267 int count;
268
269
270
271
272
273 current_join_id++;
274 start_join_round();
275
276 update_dc(NULL);
277 if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
278 crm_info("A new node joined the cluster");
279 }
280 g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
281
282 count = crmd_join_phase_count(crm_join_welcomed);
283 crm_info("Waiting on join-%d requests from %d outstanding node%s",
284 current_join_id, count, pcmk__plural_s(count));
285
286
287 }
288
289
290 void
291 do_dc_join_offer_one(long long action,
292 enum crmd_fsa_cause cause,
293 enum crmd_fsa_state cur_state,
294 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
295 {
296 crm_node_t *member;
297 ha_msg_input_t *welcome = NULL;
298 int count;
299 const char *join_to = NULL;
300
301 if (msg_data->data == NULL) {
302 crm_info("Making join-%d offers to any unconfirmed nodes "
303 "because an unknown node joined", current_join_id);
304 g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
305 check_join_state(cur_state, __func__);
306 return;
307 }
308
309 welcome = fsa_typed_data(fsa_dt_ha_msg);
310 if (welcome == NULL) {
311
312 return;
313 }
314
315 join_to = crm_element_value(welcome->msg, PCMK__XA_SRC);
316 if (join_to == NULL) {
317 crm_err("Can't make join-%d offer to unknown node", current_join_id);
318 return;
319 }
320 member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);
321
322
323
324
325
326
327 crm_update_peer_join(__func__, member, crm_join_none);
328 join_make_offer(NULL, member, NULL);
329
330
331
332
333 if (strcasecmp(join_to, controld_globals.our_nodename) != 0) {
334 member = pcmk__get_node(0, controld_globals.our_nodename, NULL,
335 pcmk__node_search_cluster_member);
336 join_make_offer(NULL, member, NULL);
337 }
338
339
340
341
342 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
343 NULL);
344
345 count = crmd_join_phase_count(crm_join_welcomed);
346 crm_info("Waiting on join-%d requests from %d outstanding node%s",
347 current_join_id, count, pcmk__plural_s(count));
348
349
350 }
351
352 static int
353 compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
354 {
355 const char *elem_l = crm_element_value(left, field);
356 const char *elem_r = crm_element_value(right, field);
357
358 long long int_elem_l;
359 long long int_elem_r;
360
361 int rc = pcmk_rc_ok;
362
363 rc = pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
364 if (rc != pcmk_rc_ok) {
365 crm_warn("Comparing current CIB %s as -1 "
366 "because '%s' is not an integer", field, elem_l);
367 }
368
369 rc = pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
370 if (rc != pcmk_rc_ok) {
371 crm_warn("Comparing joining node's CIB %s as -1 "
372 "because '%s' is not an integer", field, elem_r);
373 }
374
375 if (int_elem_l < int_elem_r) {
376 return -1;
377
378 } else if (int_elem_l > int_elem_r) {
379 return 1;
380 }
381
382 return 0;
383 }
384
385
386 void
387 do_dc_join_filter_offer(long long action,
388 enum crmd_fsa_cause cause,
389 enum crmd_fsa_state cur_state,
390 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
391 {
392 xmlNode *generation = NULL;
393
394 int cmp = 0;
395 int join_id = -1;
396 int count = 0;
397 gint value = 0;
398 gboolean ack_nack_bool = TRUE;
399 ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
400
401 const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC);
402 const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE);
403 const char *join_version = crm_element_value(join_ack->msg,
404 PCMK_XA_CRM_FEATURE_SET);
405 crm_node_t *join_node = NULL;
406
407 if (join_from == NULL) {
408 crm_err("Ignoring invalid join request without node name");
409 return;
410 }
411 join_node = pcmk__get_node(0, join_from, NULL,
412 pcmk__node_search_cluster_member);
413
414 crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
415 if (join_id != current_join_id) {
416 crm_debug("Ignoring join-%d request from %s because we are on join-%d",
417 join_id, join_from, current_join_id);
418 check_join_state(cur_state, __func__);
419 return;
420 }
421
422 generation = join_ack->xml;
423 if (max_generation_xml != NULL && generation != NULL) {
424 int lpc = 0;
425
426 const char *attributes[] = {
427 PCMK_XA_ADMIN_EPOCH,
428 PCMK_XA_EPOCH,
429 PCMK_XA_NUM_UPDATES,
430 };
431
432
433
434
435 if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
436 for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
437 cmp = compare_int_fields(max_generation_xml, generation,
438 attributes[lpc]);
439 }
440
441 } else {
442 CRM_LOG_ASSERT(false);
443 }
444 }
445
446 if (ref == NULL) {
447 ref = "none";
448 }
449
450 if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
451 crm_err("Rejecting join-%d request from node %s because we failed to "
452 "sync its CIB in join-%d " CRM_XS " ref=%s",
453 join_id, join_from, value, ref);
454 ack_nack_bool = FALSE;
455
456 } else if (!pcmk__cluster_is_node_active(join_node)) {
457 if (match_down_event(join_from) != NULL) {
458
459
460
461
462
463
464 crm_debug("Rejecting join-%d request from inactive node %s "
465 CRM_XS " ref=%s", join_id, join_from, ref);
466 } else {
467 crm_err("Rejecting join-%d request from inactive node %s "
468 CRM_XS " ref=%s", join_id, join_from, ref);
469 }
470 ack_nack_bool = FALSE;
471
472 } else if (generation == NULL) {
473 crm_err("Rejecting invalid join-%d request from node %s "
474 "missing CIB generation " CRM_XS " ref=%s",
475 join_id, join_from, ref);
476 ack_nack_bool = FALSE;
477
478 } else if ((join_version == NULL)
479 || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
480 crm_err("Rejecting join-%d request from node %s because feature set %s"
481 " is incompatible with ours (%s) " CRM_XS " ref=%s",
482 join_id, join_from, (join_version? join_version : "pre-3.1.0"),
483 CRM_FEATURE_SET, ref);
484 ack_nack_bool = FALSE;
485
486 } else if (max_generation_xml == NULL) {
487 const char *validation = crm_element_value(generation,
488 PCMK_XA_VALIDATE_WITH);
489
490 if (pcmk__get_schema(validation) == NULL) {
491 crm_err("Rejecting join-%d request from %s (with first CIB "
492 "generation) due to unknown schema version %s "
493 CRM_XS " ref=%s",
494 join_id, join_from, pcmk__s(validation, "(missing)"), ref);
495 ack_nack_bool = FALSE;
496
497 } else {
498 crm_debug("Accepting join-%d request from %s (with first CIB "
499 "generation) " CRM_XS " ref=%s",
500 join_id, join_from, ref);
501 max_generation_xml = pcmk__xml_copy(NULL, generation);
502 pcmk__str_update(&max_generation_from, join_from);
503 }
504
505 } else if ((cmp < 0)
506 || ((cmp == 0)
507 && pcmk__str_eq(join_from, controld_globals.our_nodename,
508 pcmk__str_casei))) {
509 const char *validation = crm_element_value(generation,
510 PCMK_XA_VALIDATE_WITH);
511
512 if (pcmk__get_schema(validation) == NULL) {
513 crm_err("Rejecting join-%d request from %s (with better CIB "
514 "generation than current best from %s) due to unknown "
515 "schema version %s " CRM_XS " ref=%s",
516 join_id, join_from, max_generation_from,
517 pcmk__s(validation, "(missing)"), ref);
518 ack_nack_bool = FALSE;
519
520 } else {
521 crm_debug("Accepting join-%d request from %s (with better CIB "
522 "generation than current best from %s) " CRM_XS " ref=%s",
523 join_id, join_from, max_generation_from, ref);
524 crm_log_xml_debug(max_generation_xml, "Old max generation");
525 crm_log_xml_debug(generation, "New max generation");
526
527 free_xml(max_generation_xml);
528 max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
529 pcmk__str_update(&max_generation_from, join_from);
530 }
531
532 } else {
533 crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
534 join_id, join_from, ref);
535 }
536
537 if (!ack_nack_bool) {
538 if (compare_version(join_version, "3.17.0") < 0) {
539
540
541
542 crm_update_peer_join(__func__, join_node, crm_join_nack_quiet);
543 } else {
544 crm_update_peer_join(__func__, join_node, crm_join_nack);
545 }
546 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
547
548 } else {
549 crm_update_peer_join(__func__, join_node, crm_join_integrated);
550 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
551 }
552
553 count = crmd_join_phase_count(crm_join_integrated);
554 crm_debug("%d node%s currently integrated in join-%d",
555 count, pcmk__plural_s(count), join_id);
556
557 if (check_join_state(cur_state, __func__) == FALSE) {
558
559 count = crmd_join_phase_count(crm_join_welcomed);
560 crm_debug("Waiting on join-%d requests from %d outstanding node%s",
561 join_id, count, pcmk__plural_s(count));
562 }
563 }
564
565
566 void
567 do_dc_join_finalize(long long action,
568 enum crmd_fsa_cause cause,
569 enum crmd_fsa_state cur_state,
570 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
571 {
572 char *sync_from = NULL;
573 int rc = pcmk_ok;
574 int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
575 int count_finalizable = crmd_join_phase_count(crm_join_integrated)
576 + crmd_join_phase_count(crm_join_nack)
577 + crmd_join_phase_count(crm_join_nack_quiet);
578
579
580
581
582 if (count_welcomed != 0) {
583 crm_debug("Waiting on join-%d requests from %d outstanding node%s "
584 "before finalizing join", current_join_id, count_welcomed,
585 pcmk__plural_s(count_welcomed));
586 crmd_join_phase_log(LOG_DEBUG);
587
588 return;
589
590 } else if (count_finalizable == 0) {
591 crm_debug("Finalization not needed for join-%d at the current time",
592 current_join_id);
593 crmd_join_phase_log(LOG_DEBUG);
594 check_join_state(controld_globals.fsa_state, __func__);
595 return;
596 }
597
598 controld_clear_fsa_input_flags(R_HAVE_CIB);
599 if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename,
600 pcmk__str_null_matches|pcmk__str_casei)) {
601 controld_set_fsa_input_flags(R_HAVE_CIB);
602 }
603
604 if (!controld_globals.transition_graph->complete) {
605 crm_warn("Delaying join-%d finalization while transition in progress",
606 current_join_id);
607 crmd_join_phase_log(LOG_DEBUG);
608 crmd_fsa_stall(FALSE);
609 return;
610 }
611
612 if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
613
614 sync_from = pcmk__str_copy(controld_globals.our_nodename);
615 crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
616 current_join_id, count_finalizable,
617 pcmk__plural_s(count_finalizable));
618 crm_log_xml_debug(max_generation_xml, "Requested CIB version");
619
620 } else {
621
622 sync_from = pcmk__str_copy(max_generation_from);
623 crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
624 current_join_id, count_finalizable,
625 pcmk__plural_s(count_finalizable), sync_from);
626 crm_log_xml_notice(max_generation_xml, "Requested CIB version");
627 }
628 crmd_join_phase_log(LOG_DEBUG);
629
630 rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
631 sync_from, NULL, cib_none);
632 fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
633 }
634
635 void
636 free_max_generation(void)
637 {
638 free(max_generation_from);
639 max_generation_from = NULL;
640
641 free_xml(max_generation_xml);
642 max_generation_xml = NULL;
643 }
644
645 void
646 finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
647 {
648 CRM_LOG_ASSERT(-EPERM != rc);
649
650 if (rc != pcmk_ok) {
651 const char *sync_from = (const char *) user_data;
652
653 do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
654 "Could not sync CIB from %s in join-%d: %s",
655 sync_from, current_join_id, pcmk_strerror(rc));
656
657 if (rc != -pcmk_err_old_data) {
658 record_failed_sync_node(sync_from, current_join_id);
659 }
660
661
662 register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
663 __func__);
664
665 } else if (!AM_I_DC) {
666 crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
667
668 } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
669 crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
670 "(%s)", current_join_id,
671 fsa_state2string(controld_globals.fsa_state));
672
673 } else {
674 controld_set_fsa_input_flags(R_HAVE_CIB);
675
676
677 if (!check_join_state(controld_globals.fsa_state, __func__)) {
678 int count_finalizable = 0;
679
680 count_finalizable = crmd_join_phase_count(crm_join_integrated)
681 + crmd_join_phase_count(crm_join_nack)
682 + crmd_join_phase_count(crm_join_nack_quiet);
683
684 crm_debug("Notifying %d node%s of join-%d results",
685 count_finalizable, pcmk__plural_s(count_finalizable),
686 current_join_id);
687 g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
688 }
689 }
690 }
691
692 static void
693 join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
694 xmlNode *output, void *user_data)
695 {
696 const char *node = user_data;
697
698 if (rc != pcmk_ok) {
699 fsa_data_t *msg_data = NULL;
700
701 crm_crit("join-%d node history update (via CIB call %d) for node %s "
702 "failed: %s",
703 current_join_id, call_id, node, pcmk_strerror(rc));
704 crm_log_xml_debug(msg, "failed");
705 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
706 }
707
708 crm_debug("join-%d node history update (via CIB call %d) for node %s "
709 "complete",
710 current_join_id, call_id, node);
711 check_join_state(controld_globals.fsa_state, __func__);
712 }
713
714
715 void
716 do_dc_join_ack(long long action,
717 enum crmd_fsa_cause cause,
718 enum crmd_fsa_state cur_state,
719 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
720 {
721 int join_id = -1;
722 ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
723
724 const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK);
725 char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC);
726 crm_node_t *peer = NULL;
727
728 enum controld_section_e section = controld_section_lrm;
729 char *xpath = NULL;
730 xmlNode *state = join_ack->xml;
731 xmlNode *execd_state = NULL;
732
733 cib_t *cib = controld_globals.cib_conn;
734 int rc = pcmk_ok;
735
736
737 if (join_from == NULL) {
738 crm_warn("Ignoring message received without node identification");
739 goto done;
740 }
741 if (op == NULL) {
742 crm_warn("Ignoring message received from %s without task", join_from);
743 goto done;
744 }
745
746 if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
747 crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
748 op, join_from, CRM_OP_JOIN_CONFIRM);
749 goto done;
750 }
751
752 if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) {
753 crm_warn("Ignoring join confirmation from %s without valid join ID",
754 join_from);
755 goto done;
756 }
757
758 peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
759 if (peer->join != crm_join_finalized) {
760 crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
761 "(currently %s not %s)",
762 join_id, join_from, crm_join_phase_str(peer->join),
763 crm_join_phase_str(crm_join_finalized));
764 goto done;
765 }
766
767 if (join_id != current_join_id) {
768 crm_err("Rejecting join-%d confirmation from %s "
769 "because currently on join-%d",
770 join_id, join_from, current_join_id);
771 crm_update_peer_join(__func__, peer, crm_join_nack);
772 goto done;
773 }
774
775 crm_update_peer_join(__func__, peer, crm_join_confirmed);
776
777
778
779
780
781
782 rc = cib->cmds->init_transaction(cib);
783 if (rc != pcmk_ok) {
784 goto done;
785 }
786
787
788 if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
789 section = controld_section_lrm_unlocked;
790 }
791 controld_node_state_deletion_strings(join_from, section, &xpath, NULL);
792
793 rc = cib->cmds->remove(cib, xpath, NULL,
794 cib_scope_local
795 |cib_xpath
796 |cib_multiple
797 |cib_transaction);
798 if (rc != pcmk_ok) {
799 goto done;
800 }
801
802
803 if (pcmk__str_eq(join_from, controld_globals.our_nodename,
804 pcmk__str_casei)) {
805
806
807 execd_state = controld_query_executor_state();
808
809 if (execd_state != NULL) {
810 crm_debug("Updating local node history for join-%d from query "
811 "result",
812 current_join_id);
813 state = execd_state;
814
815 } else {
816 crm_warn("Updating local node history from join-%d confirmation "
817 "because query failed",
818 current_join_id);
819 }
820
821 } else {
822 crm_debug("Updating node history for %s from join-%d confirmation",
823 join_from, current_join_id);
824 }
825
826 rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state,
827 cib_scope_local|cib_can_create|cib_transaction);
828 free_xml(execd_state);
829 if (rc != pcmk_ok) {
830 goto done;
831 }
832
833
834 rc = cib->cmds->end_transaction(cib, true, cib_scope_local);
835 fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);
836
837 if (rc > 0) {
838
839 join_from = NULL;
840 rc = pcmk_ok;
841 }
842
843 done:
844 if (rc != pcmk_ok) {
845 crm_crit("join-%d node history update for node %s failed: %s",
846 current_join_id, join_from, pcmk_strerror(rc));
847 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
848 }
849 free(join_from);
850 free(xpath);
851 }
852
853 void
854 finalize_join_for(gpointer key, gpointer value, gpointer user_data)
855 {
856 xmlNode *acknak = NULL;
857 xmlNode *tmp1 = NULL;
858 crm_node_t *join_node = value;
859 const char *join_to = join_node->uname;
860 bool integrated = false;
861
862 switch (join_node->join) {
863 case crm_join_integrated:
864 integrated = true;
865 break;
866 case crm_join_nack:
867 case crm_join_nack_quiet:
868 break;
869 default:
870 crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
871 "for join-%d", join_to,
872 crm_join_phase_str(join_node->join), current_join_id);
873 return;
874 }
875
876
877
878
879 crm_trace("Updating node name and UUID in CIB for %s", join_to);
880 tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
881 crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_node_uuid(join_node));
882 crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
883 fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
884 free_xml(tmp1);
885
886 if (join_node->join == crm_join_nack_quiet) {
887 crm_trace("Not sending nack message to node %s with feature set older "
888 "than 3.17.0", join_to);
889 return;
890 }
891
892 join_node = pcmk__get_node(0, join_to, NULL,
893 pcmk__node_search_cluster_member);
894 if (!pcmk__cluster_is_node_active(join_node)) {
895
896
897
898
899
900
901
902
903
904 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
905 return;
906 }
907
908
909 crm_debug("%sing join-%d request from %s",
910 integrated? "Acknowledg" : "Nack", current_join_id, join_to);
911 acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
912 pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
913
914 if (integrated) {
915
916 crm_update_peer_join(__func__, join_node, crm_join_finalized);
917 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
918
919
920
921
922
923 if (pcmk__cluster_num_remote_nodes() > 0) {
924 GHashTableIter iter;
925 crm_node_t *node = NULL;
926 xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);
927
928 g_hash_table_iter_init(&iter, crm_remote_peer_cache);
929 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
930 xmlNode *remote = NULL;
931
932 if (!node->conn_host) {
933 continue;
934 }
935
936 remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
937 pcmk__xe_set_props(remote,
938 PCMK_XA_ID, node->uname,
939 PCMK__XA_NODE_STATE, node->state,
940 PCMK__XA_CONNECTION_HOST, node->conn_host,
941 NULL);
942 }
943 }
944 }
945 pcmk__cluster_send_message(join_node, crm_msg_crmd, acknak);
946 free_xml(acknak);
947 return;
948 }
949
950 gboolean
951 check_join_state(enum crmd_fsa_state cur_state, const char *source)
952 {
953 static unsigned long long highest_seq = 0;
954
955 if (controld_globals.membership_id != crm_peer_seq) {
956 crm_debug("join-%d: Membership changed from %llu to %llu "
957 CRM_XS " highest=%llu state=%s for=%s",
958 current_join_id, controld_globals.membership_id, crm_peer_seq,
959 highest_seq, fsa_state2string(cur_state), source);
960 if(highest_seq < crm_peer_seq) {
961
962 highest_seq = crm_peer_seq;
963 register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
964 }
965
966 } else if (cur_state == S_INTEGRATION) {
967 if (crmd_join_phase_count(crm_join_welcomed) == 0) {
968 int count = crmd_join_phase_count(crm_join_integrated);
969
970 crm_debug("join-%d: Integration of %d peer%s complete "
971 CRM_XS " state=%s for=%s",
972 current_join_id, count, pcmk__plural_s(count),
973 fsa_state2string(cur_state), source);
974 register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
975 return TRUE;
976 }
977
978 } else if (cur_state == S_FINALIZE_JOIN) {
979 if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
980 crm_debug("join-%d: Delaying finalization until we have CIB "
981 CRM_XS " state=%s for=%s",
982 current_join_id, fsa_state2string(cur_state), source);
983 return TRUE;
984
985 } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
986 int count = crmd_join_phase_count(crm_join_welcomed);
987
988 crm_debug("join-%d: Still waiting on %d welcomed node%s "
989 CRM_XS " state=%s for=%s",
990 current_join_id, count, pcmk__plural_s(count),
991 fsa_state2string(cur_state), source);
992 crmd_join_phase_log(LOG_DEBUG);
993
994 } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
995 int count = crmd_join_phase_count(crm_join_integrated);
996
997 crm_debug("join-%d: Still waiting on %d integrated node%s "
998 CRM_XS " state=%s for=%s",
999 current_join_id, count, pcmk__plural_s(count),
1000 fsa_state2string(cur_state), source);
1001 crmd_join_phase_log(LOG_DEBUG);
1002
1003 } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
1004 int count = crmd_join_phase_count(crm_join_finalized);
1005
1006 crm_debug("join-%d: Still waiting on %d finalized node%s "
1007 CRM_XS " state=%s for=%s",
1008 current_join_id, count, pcmk__plural_s(count),
1009 fsa_state2string(cur_state), source);
1010 crmd_join_phase_log(LOG_DEBUG);
1011
1012 } else {
1013 crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
1014 current_join_id, fsa_state2string(cur_state), source);
1015 register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
1016 return TRUE;
1017 }
1018 }
1019
1020 return FALSE;
1021 }
1022
1023 void
1024 do_dc_join_final(long long action,
1025 enum crmd_fsa_cause cause,
1026 enum crmd_fsa_state cur_state,
1027 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
1028 {
1029 crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
1030 crm_update_quorum(crm_have_quorum, TRUE);
1031 }
1032
1033 int crmd_join_phase_count(enum crm_join_phase phase)
1034 {
1035 int count = 0;
1036 crm_node_t *peer;
1037 GHashTableIter iter;
1038
1039 g_hash_table_iter_init(&iter, crm_peer_cache);
1040 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1041 if(peer->join == phase) {
1042 count++;
1043 }
1044 }
1045 return count;
1046 }
1047
1048 void crmd_join_phase_log(int level)
1049 {
1050 crm_node_t *peer;
1051 GHashTableIter iter;
1052
1053 g_hash_table_iter_init(&iter, crm_peer_cache);
1054 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1055 do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
1056 crm_join_phase_str(peer->join));
1057 }
1058 }