This source file includes following definitions.
- controld_destroy_failed_sync_table
- controld_remove_failed_sync_node
- record_failed_sync_node
- lookup_failed_sync_node
- crm_update_peer_join
- start_join_round
- create_dc_message
- join_make_offer
- do_dc_join_offer_all
- do_dc_join_offer_one
- compare_int_fields
- do_dc_join_filter_offer
- do_dc_join_finalize
- free_max_generation
- finalize_sync_callback
- join_node_state_commit_callback
- do_dc_join_ack
- finalize_join_for
- check_join_state
- do_dc_join_final
- crmd_join_phase_count
- crmd_join_phase_log
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <crm/crm.h>
13
14 #include <crm/common/xml.h>
15 #include <crm/cluster.h>
16
17 #include <pacemaker-controld.h>
18
19 static char *max_generation_from = NULL;
20 static xmlNodePtr max_generation_xml = NULL;
21
22
23
24
25
26
27
28
29
30
31
32 static GHashTable *failed_sync_nodes = NULL;
33
34 void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
35 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
36 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
37
38
39
40
41 static int current_join_id = 0;
42
43
44
45
46
47 void
48 controld_destroy_failed_sync_table(void)
49 {
50 if (failed_sync_nodes != NULL) {
51 g_hash_table_destroy(failed_sync_nodes);
52 failed_sync_nodes = NULL;
53 }
54 }
55
56
57
58
59
60
61
62 void
63 controld_remove_failed_sync_node(const char *node_name)
64 {
65 if (failed_sync_nodes != NULL) {
66 g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
67 }
68 }
69
70
71
72
73
74
75
76
77 static void
78 record_failed_sync_node(const char *node_name, gint join_id)
79 {
80 if (failed_sync_nodes == NULL) {
81 failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
82 }
83
84
85
86
87 CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
88 GINT_TO_POINTER(join_id)));
89 }
90
91
92
93
94
95
96
97
98
99
100
101
102 static int
103 lookup_failed_sync_node(const char *node_name, gint *join_id)
104 {
105 *join_id = -1;
106
107 if (failed_sync_nodes != NULL) {
108 gpointer result = g_hash_table_lookup(failed_sync_nodes,
109 (gchar *) node_name);
110 if (result != NULL) {
111 *join_id = GPOINTER_TO_INT(result);
112 return pcmk_rc_ok;
113 }
114 }
115 return pcmk_rc_node_unknown;
116 }
117
118 void
119 crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
120 {
121 enum crm_join_phase last = 0;
122
123 CRM_CHECK(node != NULL, return);
124
125
126 if (pcmk_is_set(node->flags, crm_remote_node)) {
127 return;
128 }
129
130 last = node->join;
131
132 if(phase == last) {
133 crm_trace("Node %s join-%d phase is still %s "
134 CRM_XS " nodeid=%u source=%s",
135 node->uname, current_join_id, crm_join_phase_str(last),
136 node->id, source);
137
138 } else if ((phase <= crm_join_none) || (phase == (last + 1))) {
139 node->join = phase;
140 crm_trace("Node %s join-%d phase is now %s (was %s) "
141 CRM_XS " nodeid=%u source=%s",
142 node->uname, current_join_id, crm_join_phase_str(phase),
143 crm_join_phase_str(last), node->id, source);
144
145 } else {
146 crm_warn("Rejecting join-%d phase update for node %s because "
147 "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
148 current_join_id, node->uname, crm_join_phase_str(last),
149 crm_join_phase_str(phase), node->id, source);
150 }
151 }
152
153 static void
154 start_join_round(void)
155 {
156 GHashTableIter iter;
157 crm_node_t *peer = NULL;
158
159 crm_debug("Starting new join round join-%d", current_join_id);
160
161 g_hash_table_iter_init(&iter, crm_peer_cache);
162 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
163 crm_update_peer_join(__func__, peer, crm_join_none);
164 }
165 if (max_generation_from != NULL) {
166 free(max_generation_from);
167 max_generation_from = NULL;
168 }
169 if (max_generation_xml != NULL) {
170 free_xml(max_generation_xml);
171 max_generation_xml = NULL;
172 }
173 controld_clear_fsa_input_flags(R_HAVE_CIB);
174 }
175
176
177
178
179
180
181
182
183 static xmlNode *
184 create_dc_message(const char *join_op, const char *host_to)
185 {
186 xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
187 CRM_SYSTEM_DC, NULL);
188
189
190 crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id);
191
192
193
194
195 pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING,
196 pcmk_is_set(controld_globals.fsa_input_register,
197 R_SHUTDOWN));
198 return msg;
199 }
200
201 static void
202 join_make_offer(gpointer key, gpointer value, gpointer user_data)
203 {
204 xmlNode *offer = NULL;
205 crm_node_t *member = (crm_node_t *)value;
206
207 CRM_ASSERT(member != NULL);
208 if (!pcmk__cluster_is_node_active(member)) {
209 crm_info("Not making join-%d offer to inactive node %s",
210 current_join_id,
211 (member->uname? member->uname : "with unknown name"));
212 if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
213
214
215
216
217
218
219
220
221
222 pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
223 }
224 return;
225 }
226
227 if (member->uname == NULL) {
228 crm_info("Not making join-%d offer to node uuid %s with unknown name",
229 current_join_id, member->uuid);
230 return;
231 }
232
233 if (controld_globals.membership_id != crm_peer_seq) {
234 controld_globals.membership_id = crm_peer_seq;
235 crm_info("Making join-%d offers based on membership event %llu",
236 current_join_id, crm_peer_seq);
237 }
238
239 if(user_data && member->join > crm_join_none) {
240 crm_info("Not making join-%d offer to already known node %s (%s)",
241 current_join_id, member->uname,
242 crm_join_phase_str(member->join));
243 return;
244 }
245
246 crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);
247
248 offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
249
250
251 crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
252
253 crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
254 pcmk__cluster_send_message(member, crm_msg_crmd, offer);
255 free_xml(offer);
256
257 crm_update_peer_join(__func__, member, crm_join_welcomed);
258 }
259
260
261 void
262 do_dc_join_offer_all(long long action,
263 enum crmd_fsa_cause cause,
264 enum crmd_fsa_state cur_state,
265 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
266 {
267 int count;
268
269
270
271
272
273 current_join_id++;
274 start_join_round();
275
276 update_dc(NULL);
277 if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
278 crm_info("A new node joined the cluster");
279 }
280 g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
281
282 count = crmd_join_phase_count(crm_join_welcomed);
283 crm_info("Waiting on join-%d requests from %d outstanding node%s",
284 current_join_id, count, pcmk__plural_s(count));
285
286
287 }
288
289
290 void
291 do_dc_join_offer_one(long long action,
292 enum crmd_fsa_cause cause,
293 enum crmd_fsa_state cur_state,
294 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
295 {
296 crm_node_t *member;
297 ha_msg_input_t *welcome = NULL;
298 int count;
299 const char *join_to = NULL;
300
301 if (msg_data->data == NULL) {
302 crm_info("Making join-%d offers to any unconfirmed nodes "
303 "because an unknown node joined", current_join_id);
304 g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
305 check_join_state(cur_state, __func__);
306 return;
307 }
308
309 welcome = fsa_typed_data(fsa_dt_ha_msg);
310 if (welcome == NULL) {
311
312 return;
313 }
314
315 join_to = crm_element_value(welcome->msg, PCMK__XA_SRC);
316 if (join_to == NULL) {
317 crm_err("Can't make join-%d offer to unknown node", current_join_id);
318 return;
319 }
320 member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);
321
322
323
324
325
326
327 crm_update_peer_join(__func__, member, crm_join_none);
328 join_make_offer(NULL, member, NULL);
329
330
331
332
333 if (strcasecmp(join_to, controld_globals.our_nodename) != 0) {
334 member = pcmk__get_node(0, controld_globals.our_nodename, NULL,
335 pcmk__node_search_cluster_member);
336 join_make_offer(NULL, member, NULL);
337 }
338
339
340
341
342 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
343 NULL);
344
345 count = crmd_join_phase_count(crm_join_welcomed);
346 crm_info("Waiting on join-%d requests from %d outstanding node%s",
347 current_join_id, count, pcmk__plural_s(count));
348
349
350 }
351
352 static int
353 compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
354 {
355 const char *elem_l = crm_element_value(left, field);
356 const char *elem_r = crm_element_value(right, field);
357
358 long long int_elem_l;
359 long long int_elem_r;
360
361 pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
362 pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
363
364 if (int_elem_l < int_elem_r) {
365 return -1;
366
367 } else if (int_elem_l > int_elem_r) {
368 return 1;
369 }
370
371 return 0;
372 }
373
374
375 void
376 do_dc_join_filter_offer(long long action,
377 enum crmd_fsa_cause cause,
378 enum crmd_fsa_state cur_state,
379 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
380 {
381 xmlNode *generation = NULL;
382
383 int cmp = 0;
384 int join_id = -1;
385 int count = 0;
386 gint value = 0;
387 gboolean ack_nack_bool = TRUE;
388 ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
389
390 const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC);
391 const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE);
392 const char *join_version = crm_element_value(join_ack->msg,
393 PCMK_XA_CRM_FEATURE_SET);
394 crm_node_t *join_node = NULL;
395
396 if (join_from == NULL) {
397 crm_err("Ignoring invalid join request without node name");
398 return;
399 }
400 join_node = pcmk__get_node(0, join_from, NULL,
401 pcmk__node_search_cluster_member);
402
403 crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
404 if (join_id != current_join_id) {
405 crm_debug("Ignoring join-%d request from %s because we are on join-%d",
406 join_id, join_from, current_join_id);
407 check_join_state(cur_state, __func__);
408 return;
409 }
410
411 generation = join_ack->xml;
412 if (max_generation_xml != NULL && generation != NULL) {
413 int lpc = 0;
414
415 const char *attributes[] = {
416 PCMK_XA_ADMIN_EPOCH,
417 PCMK_XA_EPOCH,
418 PCMK_XA_NUM_UPDATES,
419 };
420
421
422
423
424 if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
425 for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
426 cmp = compare_int_fields(max_generation_xml, generation,
427 attributes[lpc]);
428 }
429
430 } else {
431 CRM_LOG_ASSERT(false);
432 }
433 }
434
435 if (ref == NULL) {
436 ref = "none";
437 }
438
439 if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
440 crm_err("Rejecting join-%d request from node %s because we failed to "
441 "sync its CIB in join-%d " CRM_XS " ref=%s",
442 join_id, join_from, value, ref);
443 ack_nack_bool = FALSE;
444
445 } else if (!pcmk__cluster_is_node_active(join_node)) {
446 if (match_down_event(join_from) != NULL) {
447
448
449
450
451
452
453 crm_debug("Rejecting join-%d request from inactive node %s "
454 CRM_XS " ref=%s", join_id, join_from, ref);
455 } else {
456 crm_err("Rejecting join-%d request from inactive node %s "
457 CRM_XS " ref=%s", join_id, join_from, ref);
458 }
459 ack_nack_bool = FALSE;
460
461 } else if (generation == NULL) {
462 crm_err("Rejecting invalid join-%d request from node %s "
463 "missing CIB generation " CRM_XS " ref=%s",
464 join_id, join_from, ref);
465 ack_nack_bool = FALSE;
466
467 } else if ((join_version == NULL)
468 || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
469 crm_err("Rejecting join-%d request from node %s because feature set %s"
470 " is incompatible with ours (%s) " CRM_XS " ref=%s",
471 join_id, join_from, (join_version? join_version : "pre-3.1.0"),
472 CRM_FEATURE_SET, ref);
473 ack_nack_bool = FALSE;
474
475 } else if (max_generation_xml == NULL) {
476 const char *validation = crm_element_value(generation,
477 PCMK_XA_VALIDATE_WITH);
478
479 if (pcmk__get_schema(validation) == NULL) {
480 crm_err("Rejecting join-%d request from %s (with first CIB "
481 "generation) due to unknown schema version %s "
482 CRM_XS " ref=%s",
483 join_id, join_from, pcmk__s(validation, "(missing)"), ref);
484 ack_nack_bool = FALSE;
485
486 } else {
487 crm_debug("Accepting join-%d request from %s (with first CIB "
488 "generation) " CRM_XS " ref=%s",
489 join_id, join_from, ref);
490 max_generation_xml = pcmk__xml_copy(NULL, generation);
491 pcmk__str_update(&max_generation_from, join_from);
492 }
493
494 } else if ((cmp < 0)
495 || ((cmp == 0)
496 && pcmk__str_eq(join_from, controld_globals.our_nodename,
497 pcmk__str_casei))) {
498 const char *validation = crm_element_value(generation,
499 PCMK_XA_VALIDATE_WITH);
500
501 if (pcmk__get_schema(validation) == NULL) {
502 crm_err("Rejecting join-%d request from %s (with better CIB "
503 "generation than current best from %s) due to unknown "
504 "schema version %s " CRM_XS " ref=%s",
505 join_id, join_from, max_generation_from,
506 pcmk__s(validation, "(missing)"), ref);
507 ack_nack_bool = FALSE;
508
509 } else {
510 crm_debug("Accepting join-%d request from %s (with better CIB "
511 "generation than current best from %s) " CRM_XS " ref=%s",
512 join_id, join_from, max_generation_from, ref);
513 crm_log_xml_debug(max_generation_xml, "Old max generation");
514 crm_log_xml_debug(generation, "New max generation");
515
516 free_xml(max_generation_xml);
517 max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
518 pcmk__str_update(&max_generation_from, join_from);
519 }
520
521 } else {
522 crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
523 join_id, join_from, ref);
524 }
525
526 if (!ack_nack_bool) {
527 if (compare_version(join_version, "3.17.0") < 0) {
528
529
530
531 crm_update_peer_join(__func__, join_node, crm_join_nack_quiet);
532 } else {
533 crm_update_peer_join(__func__, join_node, crm_join_nack);
534 }
535 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
536
537 } else {
538 crm_update_peer_join(__func__, join_node, crm_join_integrated);
539 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
540 }
541
542 count = crmd_join_phase_count(crm_join_integrated);
543 crm_debug("%d node%s currently integrated in join-%d",
544 count, pcmk__plural_s(count), join_id);
545
546 if (check_join_state(cur_state, __func__) == FALSE) {
547
548 count = crmd_join_phase_count(crm_join_welcomed);
549 crm_debug("Waiting on join-%d requests from %d outstanding node%s",
550 join_id, count, pcmk__plural_s(count));
551 }
552 }
553
554
555 void
556 do_dc_join_finalize(long long action,
557 enum crmd_fsa_cause cause,
558 enum crmd_fsa_state cur_state,
559 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
560 {
561 char *sync_from = NULL;
562 int rc = pcmk_ok;
563 int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
564 int count_finalizable = crmd_join_phase_count(crm_join_integrated)
565 + crmd_join_phase_count(crm_join_nack)
566 + crmd_join_phase_count(crm_join_nack_quiet);
567
568
569
570
571 if (count_welcomed != 0) {
572 crm_debug("Waiting on join-%d requests from %d outstanding node%s "
573 "before finalizing join", current_join_id, count_welcomed,
574 pcmk__plural_s(count_welcomed));
575 crmd_join_phase_log(LOG_DEBUG);
576
577 return;
578
579 } else if (count_finalizable == 0) {
580 crm_debug("Finalization not needed for join-%d at the current time",
581 current_join_id);
582 crmd_join_phase_log(LOG_DEBUG);
583 check_join_state(controld_globals.fsa_state, __func__);
584 return;
585 }
586
587 controld_clear_fsa_input_flags(R_HAVE_CIB);
588 if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename,
589 pcmk__str_null_matches|pcmk__str_casei)) {
590 controld_set_fsa_input_flags(R_HAVE_CIB);
591 }
592
593 if (!controld_globals.transition_graph->complete) {
594 crm_warn("Delaying join-%d finalization while transition in progress",
595 current_join_id);
596 crmd_join_phase_log(LOG_DEBUG);
597 crmd_fsa_stall(FALSE);
598 return;
599 }
600
601 if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
602
603 sync_from = pcmk__str_copy(controld_globals.our_nodename);
604 crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
605 current_join_id, count_finalizable,
606 pcmk__plural_s(count_finalizable));
607 crm_log_xml_debug(max_generation_xml, "Requested CIB version");
608
609 } else {
610
611 sync_from = pcmk__str_copy(max_generation_from);
612 crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
613 current_join_id, count_finalizable,
614 pcmk__plural_s(count_finalizable), sync_from);
615 crm_log_xml_notice(max_generation_xml, "Requested CIB version");
616 }
617 crmd_join_phase_log(LOG_DEBUG);
618
619 rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
620 sync_from, NULL, cib_none);
621 fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
622 }
623
624 void
625 free_max_generation(void)
626 {
627 free(max_generation_from);
628 max_generation_from = NULL;
629
630 free_xml(max_generation_xml);
631 max_generation_xml = NULL;
632 }
633
634 void
635 finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
636 {
637 CRM_LOG_ASSERT(-EPERM != rc);
638
639 if (rc != pcmk_ok) {
640 const char *sync_from = (const char *) user_data;
641
642 do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
643 "Could not sync CIB from %s in join-%d: %s",
644 sync_from, current_join_id, pcmk_strerror(rc));
645
646 if (rc != -pcmk_err_old_data) {
647 record_failed_sync_node(sync_from, current_join_id);
648 }
649
650
651 register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
652 __func__);
653
654 } else if (!AM_I_DC) {
655 crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
656
657 } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
658 crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
659 "(%s)", current_join_id,
660 fsa_state2string(controld_globals.fsa_state));
661
662 } else {
663 controld_set_fsa_input_flags(R_HAVE_CIB);
664
665
666 if (!check_join_state(controld_globals.fsa_state, __func__)) {
667 int count_finalizable = 0;
668
669 count_finalizable = crmd_join_phase_count(crm_join_integrated)
670 + crmd_join_phase_count(crm_join_nack)
671 + crmd_join_phase_count(crm_join_nack_quiet);
672
673 crm_debug("Notifying %d node%s of join-%d results",
674 count_finalizable, pcmk__plural_s(count_finalizable),
675 current_join_id);
676 g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
677 }
678 }
679 }
680
681 static void
682 join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
683 xmlNode *output, void *user_data)
684 {
685 const char *node = user_data;
686
687 if (rc != pcmk_ok) {
688 fsa_data_t *msg_data = NULL;
689
690 crm_crit("join-%d node history update (via CIB call %d) for node %s "
691 "failed: %s",
692 current_join_id, call_id, node, pcmk_strerror(rc));
693 crm_log_xml_debug(msg, "failed");
694 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
695 }
696
697 crm_debug("join-%d node history update (via CIB call %d) for node %s "
698 "complete",
699 current_join_id, call_id, node);
700 check_join_state(controld_globals.fsa_state, __func__);
701 }
702
703
704 void
705 do_dc_join_ack(long long action,
706 enum crmd_fsa_cause cause,
707 enum crmd_fsa_state cur_state,
708 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
709 {
710 int join_id = -1;
711 ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
712
713 const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK);
714 char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC);
715 crm_node_t *peer = NULL;
716
717 enum controld_section_e section = controld_section_lrm;
718 char *xpath = NULL;
719 xmlNode *state = join_ack->xml;
720 xmlNode *execd_state = NULL;
721
722 cib_t *cib = controld_globals.cib_conn;
723 int rc = pcmk_ok;
724
725
726 if (join_from == NULL) {
727 crm_warn("Ignoring message received without node identification");
728 goto done;
729 }
730 if (op == NULL) {
731 crm_warn("Ignoring message received from %s without task", join_from);
732 goto done;
733 }
734
735 if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
736 crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
737 op, join_from, CRM_OP_JOIN_CONFIRM);
738 goto done;
739 }
740
741 if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) {
742 crm_warn("Ignoring join confirmation from %s without valid join ID",
743 join_from);
744 goto done;
745 }
746
747 peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
748 if (peer->join != crm_join_finalized) {
749 crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
750 "(currently %s not %s)",
751 join_id, join_from, crm_join_phase_str(peer->join),
752 crm_join_phase_str(crm_join_finalized));
753 goto done;
754 }
755
756 if (join_id != current_join_id) {
757 crm_err("Rejecting join-%d confirmation from %s "
758 "because currently on join-%d",
759 join_id, join_from, current_join_id);
760 crm_update_peer_join(__func__, peer, crm_join_nack);
761 goto done;
762 }
763
764 crm_update_peer_join(__func__, peer, crm_join_confirmed);
765
766
767
768
769
770
771 rc = cib->cmds->init_transaction(cib);
772 if (rc != pcmk_ok) {
773 goto done;
774 }
775
776
777 if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
778 section = controld_section_lrm_unlocked;
779 }
780 controld_node_state_deletion_strings(join_from, section, &xpath, NULL);
781
782 rc = cib->cmds->remove(cib, xpath, NULL,
783 cib_scope_local
784 |cib_xpath
785 |cib_multiple
786 |cib_transaction);
787 if (rc != pcmk_ok) {
788 goto done;
789 }
790
791
792 if (pcmk__str_eq(join_from, controld_globals.our_nodename,
793 pcmk__str_casei)) {
794
795
796 execd_state = controld_query_executor_state();
797
798 if (execd_state != NULL) {
799 crm_debug("Updating local node history for join-%d from query "
800 "result",
801 current_join_id);
802 state = execd_state;
803
804 } else {
805 crm_warn("Updating local node history from join-%d confirmation "
806 "because query failed",
807 current_join_id);
808 }
809
810 } else {
811 crm_debug("Updating node history for %s from join-%d confirmation",
812 join_from, current_join_id);
813 }
814
815 rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state,
816 cib_scope_local|cib_can_create|cib_transaction);
817 free_xml(execd_state);
818 if (rc != pcmk_ok) {
819 goto done;
820 }
821
822
823 rc = cib->cmds->end_transaction(cib, true, cib_scope_local);
824 fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);
825
826 if (rc > 0) {
827
828 join_from = NULL;
829 rc = pcmk_ok;
830 }
831
832 done:
833 if (rc != pcmk_ok) {
834 crm_crit("join-%d node history update for node %s failed: %s",
835 current_join_id, join_from, pcmk_strerror(rc));
836 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
837 }
838 free(join_from);
839 free(xpath);
840 }
841
842 void
843 finalize_join_for(gpointer key, gpointer value, gpointer user_data)
844 {
845 xmlNode *acknak = NULL;
846 xmlNode *tmp1 = NULL;
847 crm_node_t *join_node = value;
848 const char *join_to = join_node->uname;
849 bool integrated = false;
850
851 switch (join_node->join) {
852 case crm_join_integrated:
853 integrated = true;
854 break;
855 case crm_join_nack:
856 case crm_join_nack_quiet:
857 break;
858 default:
859 crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
860 "for join-%d", join_to,
861 crm_join_phase_str(join_node->join), current_join_id);
862 return;
863 }
864
865
866
867
868 crm_trace("Updating node name and UUID in CIB for %s", join_to);
869 tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
870 crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_node_uuid(join_node));
871 crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
872 fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
873 free_xml(tmp1);
874
875 if (join_node->join == crm_join_nack_quiet) {
876 crm_trace("Not sending nack message to node %s with feature set older "
877 "than 3.17.0", join_to);
878 return;
879 }
880
881 join_node = pcmk__get_node(0, join_to, NULL,
882 pcmk__node_search_cluster_member);
883 if (!pcmk__cluster_is_node_active(join_node)) {
884
885
886
887
888
889
890
891
892
893 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
894 return;
895 }
896
897
898 crm_debug("%sing join-%d request from %s",
899 integrated? "Acknowledg" : "Nack", current_join_id, join_to);
900 acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
901 pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
902
903 if (integrated) {
904
905 crm_update_peer_join(__func__, join_node, crm_join_finalized);
906 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
907
908
909
910
911
912 if (pcmk__cluster_num_remote_nodes() > 0) {
913 GHashTableIter iter;
914 crm_node_t *node = NULL;
915 xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);
916
917 g_hash_table_iter_init(&iter, crm_remote_peer_cache);
918 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
919 xmlNode *remote = NULL;
920
921 if (!node->conn_host) {
922 continue;
923 }
924
925 remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
926 pcmk__xe_set_props(remote,
927 PCMK_XA_ID, node->uname,
928 PCMK__XA_NODE_STATE, node->state,
929 PCMK__XA_CONNECTION_HOST, node->conn_host,
930 NULL);
931 }
932 }
933 }
934 pcmk__cluster_send_message(join_node, crm_msg_crmd, acknak);
935 free_xml(acknak);
936 return;
937 }
938
939 gboolean
940 check_join_state(enum crmd_fsa_state cur_state, const char *source)
941 {
942 static unsigned long long highest_seq = 0;
943
944 if (controld_globals.membership_id != crm_peer_seq) {
945 crm_debug("join-%d: Membership changed from %llu to %llu "
946 CRM_XS " highest=%llu state=%s for=%s",
947 current_join_id, controld_globals.membership_id, crm_peer_seq,
948 highest_seq, fsa_state2string(cur_state), source);
949 if(highest_seq < crm_peer_seq) {
950
951 highest_seq = crm_peer_seq;
952 register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
953 }
954
955 } else if (cur_state == S_INTEGRATION) {
956 if (crmd_join_phase_count(crm_join_welcomed) == 0) {
957 int count = crmd_join_phase_count(crm_join_integrated);
958
959 crm_debug("join-%d: Integration of %d peer%s complete "
960 CRM_XS " state=%s for=%s",
961 current_join_id, count, pcmk__plural_s(count),
962 fsa_state2string(cur_state), source);
963 register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
964 return TRUE;
965 }
966
967 } else if (cur_state == S_FINALIZE_JOIN) {
968 if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
969 crm_debug("join-%d: Delaying finalization until we have CIB "
970 CRM_XS " state=%s for=%s",
971 current_join_id, fsa_state2string(cur_state), source);
972 return TRUE;
973
974 } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
975 int count = crmd_join_phase_count(crm_join_welcomed);
976
977 crm_debug("join-%d: Still waiting on %d welcomed node%s "
978 CRM_XS " state=%s for=%s",
979 current_join_id, count, pcmk__plural_s(count),
980 fsa_state2string(cur_state), source);
981 crmd_join_phase_log(LOG_DEBUG);
982
983 } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
984 int count = crmd_join_phase_count(crm_join_integrated);
985
986 crm_debug("join-%d: Still waiting on %d integrated node%s "
987 CRM_XS " state=%s for=%s",
988 current_join_id, count, pcmk__plural_s(count),
989 fsa_state2string(cur_state), source);
990 crmd_join_phase_log(LOG_DEBUG);
991
992 } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
993 int count = crmd_join_phase_count(crm_join_finalized);
994
995 crm_debug("join-%d: Still waiting on %d finalized node%s "
996 CRM_XS " state=%s for=%s",
997 current_join_id, count, pcmk__plural_s(count),
998 fsa_state2string(cur_state), source);
999 crmd_join_phase_log(LOG_DEBUG);
1000
1001 } else {
1002 crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
1003 current_join_id, fsa_state2string(cur_state), source);
1004 register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
1005 return TRUE;
1006 }
1007 }
1008
1009 return FALSE;
1010 }
1011
1012 void
1013 do_dc_join_final(long long action,
1014 enum crmd_fsa_cause cause,
1015 enum crmd_fsa_state cur_state,
1016 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
1017 {
1018 crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
1019 crm_update_quorum(crm_have_quorum, TRUE);
1020 }
1021
1022 int crmd_join_phase_count(enum crm_join_phase phase)
1023 {
1024 int count = 0;
1025 crm_node_t *peer;
1026 GHashTableIter iter;
1027
1028 g_hash_table_iter_init(&iter, crm_peer_cache);
1029 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1030 if(peer->join == phase) {
1031 count++;
1032 }
1033 }
1034 return count;
1035 }
1036
1037 void crmd_join_phase_log(int level)
1038 {
1039 crm_node_t *peer;
1040 GHashTableIter iter;
1041
1042 g_hash_table_iter_init(&iter, crm_peer_cache);
1043 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1044 do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
1045 crm_join_phase_str(peer->join));
1046 }
1047 }