This source file includes following definitions.
- join_phase_text
- controld_destroy_failed_sync_table
- controld_remove_failed_sync_node
- record_failed_sync_node
- lookup_failed_sync_node
- crm_update_peer_join
- start_join_round
- create_dc_message
- join_make_offer
- do_dc_join_offer_all
- do_dc_join_offer_one
- compare_int_fields
- do_dc_join_filter_offer
- do_dc_join_finalize
- free_max_generation
- finalize_sync_callback
- join_node_state_commit_callback
- do_dc_join_ack
- finalize_join_for
- check_join_state
- do_dc_join_final
- crmd_join_phase_count
- crmd_join_phase_log
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <inttypes.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16
17 #include <glib.h>
18 #include <libxml/tree.h>
19
20 #include <crm/crm.h>
21
22 #include <crm/common/xml.h>
23 #include <crm/cluster.h>
24
25 #include <pacemaker-controld.h>
26
27 static char *max_generation_from = NULL;
28 static xmlNodePtr max_generation_xml = NULL;
29
30
31
32
33
34
35
36
37
38
39
40 static GHashTable *failed_sync_nodes = NULL;
41
42 void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
43 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
44 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
45
46
47
48
49 static int current_join_id = 0;
50
51
52
53
54
55
56
57
58
59 static const char *
60 join_phase_text(enum controld_join_phase phase)
61 {
62 switch (phase) {
63 case controld_join_nack:
64 return "nack";
65 case controld_join_none:
66 return "none";
67 case controld_join_welcomed:
68 return "welcomed";
69 case controld_join_integrated:
70 return "integrated";
71 case controld_join_finalized:
72 return "finalized";
73 case controld_join_confirmed:
74 return "confirmed";
75 default:
76 return "invalid";
77 }
78 }
79
80
81
82
83
84 void
85 controld_destroy_failed_sync_table(void)
86 {
87 if (failed_sync_nodes != NULL) {
88 g_hash_table_destroy(failed_sync_nodes);
89 failed_sync_nodes = NULL;
90 }
91 }
92
93
94
95
96
97
98
99 void
100 controld_remove_failed_sync_node(const char *node_name)
101 {
102 if (failed_sync_nodes != NULL) {
103 g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
104 }
105 }
106
107
108
109
110
111
112
113
114 static void
115 record_failed_sync_node(const char *node_name, gint join_id)
116 {
117 if (failed_sync_nodes == NULL) {
118 failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
119 }
120
121
122
123
124 CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
125 GINT_TO_POINTER(join_id)));
126 }
127
128
129
130
131
132
133
134
135
136
137
138
139 static int
140 lookup_failed_sync_node(const char *node_name, gint *join_id)
141 {
142 *join_id = -1;
143
144 if (failed_sync_nodes != NULL) {
145 gpointer result = g_hash_table_lookup(failed_sync_nodes,
146 (gchar *) node_name);
147 if (result != NULL) {
148 *join_id = GPOINTER_TO_INT(result);
149 return pcmk_rc_ok;
150 }
151 }
152 return pcmk_rc_node_unknown;
153 }
154
155 void
156 crm_update_peer_join(const char *source, pcmk__node_status_t *node,
157 enum controld_join_phase phase)
158 {
159 enum controld_join_phase last = controld_get_join_phase(node);
160
161 CRM_CHECK(node != NULL, return);
162
163
164 if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
165 return;
166 }
167
168 if (phase == last) {
169 crm_trace("Node %s join-%d phase is still %s "
170 QB_XS " nodeid=%" PRIu32 " source=%s",
171 node->name, current_join_id, join_phase_text(last),
172 node->cluster_layer_id, source);
173 return;
174 }
175
176 if ((phase <= controld_join_none) || (phase == (last + 1))) {
177 struct controld_node_status_data *data = NULL;
178
179 if (node->user_data == NULL) {
180 node->user_data =
181 pcmk__assert_alloc(1, sizeof(struct controld_node_status_data));
182 }
183 data = node->user_data;
184 data->join_phase = phase;
185
186 crm_trace("Node %s join-%d phase is now %s (was %s) "
187 QB_XS " nodeid=%" PRIu32 " source=%s",
188 node->name, current_join_id, join_phase_text(phase),
189 join_phase_text(last), node->cluster_layer_id,
190 source);
191 return;
192 }
193
194 crm_warn("Rejecting join-%d phase update for node %s because can't go from "
195 "%s to %s " QB_XS " nodeid=%" PRIu32 " source=%s",
196 current_join_id, node->name, join_phase_text(last),
197 join_phase_text(phase), node->cluster_layer_id, source);
198 }
199
200 static void
201 start_join_round(void)
202 {
203 GHashTableIter iter;
204 pcmk__node_status_t *peer = NULL;
205
206 crm_debug("Starting new join round join-%d", current_join_id);
207
208 g_hash_table_iter_init(&iter, pcmk__peer_cache);
209 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
210 crm_update_peer_join(__func__, peer, controld_join_none);
211 }
212 if (max_generation_from != NULL) {
213 free(max_generation_from);
214 max_generation_from = NULL;
215 }
216 if (max_generation_xml != NULL) {
217 pcmk__xml_free(max_generation_xml);
218 max_generation_xml = NULL;
219 }
220 controld_clear_fsa_input_flags(R_HAVE_CIB);
221 }
222
223
224
225
226
227
228
229
230 static xmlNode *
231 create_dc_message(const char *join_op, const char *host_to)
232 {
233 xmlNode *msg = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_DC, host_to,
234 CRM_SYSTEM_CRMD, join_op, NULL);
235
236
237 crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id);
238
239
240
241
242 pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING,
243 pcmk_is_set(controld_globals.fsa_input_register,
244 R_SHUTDOWN));
245 return msg;
246 }
247
248 static void
249 join_make_offer(gpointer key, gpointer value, gpointer user_data)
250 {
251
252
253
254 xmlNode *offer = NULL;
255 pcmk__node_status_t *member = (pcmk__node_status_t *) value;
256
257 pcmk__assert(member != NULL);
258 if (!pcmk__cluster_is_node_active(member)) {
259 crm_info("Not making join-%d offer to inactive node %s",
260 current_join_id, pcmk__s(member->name, "with unknown name"));
261 if ((member->expected == NULL)
262 && pcmk__str_eq(member->state, PCMK__VALUE_LOST, pcmk__str_none)) {
263
264
265
266
267
268
269
270
271
272 pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
273 }
274 return;
275 }
276
277 if (member->name == NULL) {
278 crm_info("Not making join-%d offer to node uuid %s with unknown name",
279 current_join_id, member->xml_id);
280 return;
281 }
282
283 if (controld_globals.membership_id != controld_globals.peer_seq) {
284 controld_globals.membership_id = controld_globals.peer_seq;
285 crm_info("Making join-%d offers based on membership event %llu",
286 current_join_id, controld_globals.peer_seq);
287 }
288
289 if (user_data != NULL) {
290 enum controld_join_phase phase = controld_get_join_phase(member);
291
292 if (phase > controld_join_none) {
293 crm_info("Not making join-%d offer to already known node %s (%s)",
294 current_join_id, member->name, join_phase_text(phase));
295 return;
296 }
297 }
298
299 crm_update_peer_join(__func__, (pcmk__node_status_t*) member,
300 controld_join_none);
301
302 offer = create_dc_message(CRM_OP_JOIN_OFFER, member->name);
303
304
305 crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
306
307 crm_info("Sending join-%d offer to %s", current_join_id, member->name);
308 pcmk__cluster_send_message(member, pcmk_ipc_controld, offer);
309 pcmk__xml_free(offer);
310
311 crm_update_peer_join(__func__, member, controld_join_welcomed);
312 }
313
314
315 void
316 do_dc_join_offer_all(long long action,
317 enum crmd_fsa_cause cause,
318 enum crmd_fsa_state cur_state,
319 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
320 {
321 int count;
322
323
324
325
326
327 current_join_id++;
328 start_join_round();
329
330 update_dc(NULL);
331 if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
332 crm_info("A new node joined the cluster");
333 }
334 g_hash_table_foreach(pcmk__peer_cache, join_make_offer, NULL);
335
336 count = crmd_join_phase_count(controld_join_welcomed);
337 crm_info("Waiting on join-%d requests from %d outstanding node%s",
338 current_join_id, count, pcmk__plural_s(count));
339
340
341 }
342
343
344 void
345 do_dc_join_offer_one(long long action,
346 enum crmd_fsa_cause cause,
347 enum crmd_fsa_state cur_state,
348 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
349 {
350 pcmk__node_status_t *member = NULL;
351 ha_msg_input_t *welcome = NULL;
352 int count;
353 const char *join_to = NULL;
354
355 if (msg_data->data == NULL) {
356 crm_info("Making join-%d offers to any unconfirmed nodes "
357 "because an unknown node joined", current_join_id);
358 g_hash_table_foreach(pcmk__peer_cache, join_make_offer, &member);
359 check_join_state(cur_state, __func__);
360 return;
361 }
362
363 welcome = fsa_typed_data(fsa_dt_ha_msg);
364 if (welcome == NULL) {
365
366 return;
367 }
368
369 join_to = crm_element_value(welcome->msg, PCMK__XA_SRC);
370 if (join_to == NULL) {
371 crm_err("Can't make join-%d offer to unknown node", current_join_id);
372 return;
373 }
374 member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);
375
376
377
378
379
380
381 crm_update_peer_join(__func__, member, controld_join_none);
382 join_make_offer(NULL, member, NULL);
383
384
385
386
387 if (!controld_is_local_node(join_to)) {
388 member = controld_get_local_node_status();
389 join_make_offer(NULL, member, NULL);
390 }
391
392
393
394
395 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
396 NULL);
397
398 count = crmd_join_phase_count(controld_join_welcomed);
399 crm_info("Waiting on join-%d requests from %d outstanding node%s",
400 current_join_id, count, pcmk__plural_s(count));
401
402
403 }
404
405 static int
406 compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
407 {
408 const char *elem_l = crm_element_value(left, field);
409 const char *elem_r = crm_element_value(right, field);
410
411 long long int_elem_l;
412 long long int_elem_r;
413
414 int rc = pcmk_rc_ok;
415
416 rc = pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
417 if (rc != pcmk_rc_ok) {
418 crm_warn("Comparing current CIB %s as -1 "
419 "because '%s' is not an integer", field, elem_l);
420 }
421
422 rc = pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
423 if (rc != pcmk_rc_ok) {
424 crm_warn("Comparing joining node's CIB %s as -1 "
425 "because '%s' is not an integer", field, elem_r);
426 }
427
428 if (int_elem_l < int_elem_r) {
429 return -1;
430
431 } else if (int_elem_l > int_elem_r) {
432 return 1;
433 }
434
435 return 0;
436 }
437
438
439 void
440 do_dc_join_filter_offer(long long action,
441 enum crmd_fsa_cause cause,
442 enum crmd_fsa_state cur_state,
443 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
444 {
445 xmlNode *generation = NULL;
446
447 int cmp = 0;
448 int join_id = -1;
449 int count = 0;
450 gint value = 0;
451 gboolean ack_nack_bool = TRUE;
452 ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
453
454 const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC);
455 const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE);
456 const char *join_version = crm_element_value(join_ack->msg,
457 PCMK_XA_CRM_FEATURE_SET);
458 pcmk__node_status_t *join_node = NULL;
459
460 if (join_from == NULL) {
461 crm_err("Ignoring invalid join request without node name");
462 return;
463 }
464 join_node = pcmk__get_node(0, join_from, NULL,
465 pcmk__node_search_cluster_member);
466
467 crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
468 if (join_id != current_join_id) {
469 crm_debug("Ignoring join-%d request from %s because we are on join-%d",
470 join_id, join_from, current_join_id);
471 check_join_state(cur_state, __func__);
472 return;
473 }
474
475 generation = join_ack->xml;
476 if (max_generation_xml != NULL && generation != NULL) {
477 int lpc = 0;
478
479 const char *attributes[] = {
480 PCMK_XA_ADMIN_EPOCH,
481 PCMK_XA_EPOCH,
482 PCMK_XA_NUM_UPDATES,
483 };
484
485
486
487
488 if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
489 for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
490 cmp = compare_int_fields(max_generation_xml, generation,
491 attributes[lpc]);
492 }
493
494 } else {
495 CRM_LOG_ASSERT(false);
496 }
497 }
498
499 if (ref == NULL) {
500 ref = "none";
501 }
502
503 if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
504 crm_err("Rejecting join-%d request from node %s because we failed to "
505 "sync its CIB in join-%d " QB_XS " ref=%s",
506 join_id, join_from, value, ref);
507 ack_nack_bool = FALSE;
508
509 } else if (!pcmk__cluster_is_node_active(join_node)) {
510 if (match_down_event(join_from) != NULL) {
511
512
513
514
515
516
517 crm_debug("Rejecting join-%d request from inactive node %s "
518 QB_XS " ref=%s", join_id, join_from, ref);
519 } else {
520 crm_err("Rejecting join-%d request from inactive node %s "
521 QB_XS " ref=%s", join_id, join_from, ref);
522 }
523 ack_nack_bool = FALSE;
524
525 } else if (generation == NULL) {
526 crm_err("Rejecting invalid join-%d request from node %s "
527 "missing CIB generation " QB_XS " ref=%s",
528 join_id, join_from, ref);
529 ack_nack_bool = FALSE;
530
531 } else if ((join_version == NULL)
532 || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
533 crm_err("Rejecting join-%d request from node %s because feature set %s"
534 " is incompatible with ours (%s) " QB_XS " ref=%s",
535 join_id, join_from, (join_version? join_version : "pre-3.1.0"),
536 CRM_FEATURE_SET, ref);
537 ack_nack_bool = FALSE;
538
539 } else if (max_generation_xml == NULL) {
540 const char *validation = crm_element_value(generation,
541 PCMK_XA_VALIDATE_WITH);
542
543 if (pcmk__get_schema(validation) == NULL) {
544 crm_err("Rejecting join-%d request from %s (with first CIB "
545 "generation) due to %s schema version %s " QB_XS " ref=%s",
546 join_id, join_from,
547 ((validation == NULL)? "missing" : "unknown"),
548 pcmk__s(validation, ""), ref);
549 ack_nack_bool = FALSE;
550
551 } else {
552 crm_debug("Accepting join-%d request from %s (with first CIB "
553 "generation) " QB_XS " ref=%s",
554 join_id, join_from, ref);
555 max_generation_xml = pcmk__xml_copy(NULL, generation);
556 pcmk__str_update(&max_generation_from, join_from);
557 }
558
559 } else if ((cmp < 0)
560 || ((cmp == 0) && controld_is_local_node(join_from))) {
561 const char *validation = crm_element_value(generation,
562 PCMK_XA_VALIDATE_WITH);
563
564 if (pcmk__get_schema(validation) == NULL) {
565 crm_err("Rejecting join-%d request from %s (with better CIB "
566 "generation than current best from %s) due to %s "
567 "schema version %s " QB_XS " ref=%s",
568 join_id, join_from, max_generation_from,
569 ((validation == NULL)? "missing" : "unknown"),
570 pcmk__s(validation, ""), ref);
571 ack_nack_bool = FALSE;
572
573 } else {
574 crm_debug("Accepting join-%d request from %s (with better CIB "
575 "generation than current best from %s) " QB_XS " ref=%s",
576 join_id, join_from, max_generation_from, ref);
577 crm_log_xml_debug(max_generation_xml, "Old max generation");
578 crm_log_xml_debug(generation, "New max generation");
579
580 pcmk__xml_free(max_generation_xml);
581 max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
582 pcmk__str_update(&max_generation_from, join_from);
583 }
584
585 } else {
586 crm_debug("Accepting join-%d request from %s " QB_XS " ref=%s",
587 join_id, join_from, ref);
588 }
589
590 if (!ack_nack_bool) {
591 crm_update_peer_join(__func__, join_node, controld_join_nack);
592 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
593
594 } else {
595 crm_update_peer_join(__func__, join_node, controld_join_integrated);
596 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
597 }
598
599 count = crmd_join_phase_count(controld_join_integrated);
600 crm_debug("%d node%s currently integrated in join-%d",
601 count, pcmk__plural_s(count), join_id);
602
603 if (check_join_state(cur_state, __func__) == FALSE) {
604
605 count = crmd_join_phase_count(controld_join_welcomed);
606 crm_debug("Waiting on join-%d requests from %d outstanding node%s",
607 join_id, count, pcmk__plural_s(count));
608 }
609 }
610
611
612 void
613 do_dc_join_finalize(long long action,
614 enum crmd_fsa_cause cause,
615 enum crmd_fsa_state cur_state,
616 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
617 {
618 char *sync_from = NULL;
619 int rc = pcmk_ok;
620 int count_welcomed = crmd_join_phase_count(controld_join_welcomed);
621 int count_finalizable = crmd_join_phase_count(controld_join_integrated)
622 + crmd_join_phase_count(controld_join_nack);
623
624
625
626
627 if (count_welcomed != 0) {
628 crm_debug("Waiting on join-%d requests from %d outstanding node%s "
629 "before finalizing join", current_join_id, count_welcomed,
630 pcmk__plural_s(count_welcomed));
631 crmd_join_phase_log(LOG_DEBUG);
632
633 return;
634
635 } else if (count_finalizable == 0) {
636 crm_debug("Finalization not needed for join-%d at the current time",
637 current_join_id);
638 crmd_join_phase_log(LOG_DEBUG);
639 check_join_state(controld_globals.fsa_state, __func__);
640 return;
641 }
642
643 controld_clear_fsa_input_flags(R_HAVE_CIB);
644 if ((max_generation_from == NULL)
645 || controld_is_local_node(max_generation_from)) {
646 controld_set_fsa_input_flags(R_HAVE_CIB);
647 }
648
649 if (!controld_globals.transition_graph->complete) {
650 crm_warn("Delaying join-%d finalization while transition in progress",
651 current_join_id);
652 crmd_join_phase_log(LOG_DEBUG);
653 crmd_fsa_stall(FALSE);
654 return;
655 }
656
657 if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
658
659 sync_from = pcmk__str_copy(controld_globals.cluster->priv->node_name);
660 crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
661 current_join_id, count_finalizable,
662 pcmk__plural_s(count_finalizable));
663 crm_log_xml_debug(max_generation_xml, "Requested CIB version");
664
665 } else {
666
667 sync_from = pcmk__str_copy(max_generation_from);
668 crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
669 current_join_id, count_finalizable,
670 pcmk__plural_s(count_finalizable), sync_from);
671 crm_log_xml_notice(max_generation_xml, "Requested CIB version");
672 }
673 crmd_join_phase_log(LOG_DEBUG);
674
675 rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
676 sync_from, NULL, cib_none);
677 fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
678 }
679
680 void
681 free_max_generation(void)
682 {
683 free(max_generation_from);
684 max_generation_from = NULL;
685
686 pcmk__xml_free(max_generation_xml);
687 max_generation_xml = NULL;
688 }
689
690 void
691 finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
692 {
693 CRM_LOG_ASSERT(-EPERM != rc);
694
695 if (rc != pcmk_ok) {
696 const char *sync_from = (const char *) user_data;
697
698 do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
699 "Could not sync CIB from %s in join-%d: %s",
700 sync_from, current_join_id, pcmk_strerror(rc));
701
702 if (rc != -pcmk_err_old_data) {
703 record_failed_sync_node(sync_from, current_join_id);
704 }
705
706
707 register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
708 __func__);
709
710 } else if (!AM_I_DC) {
711 crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
712
713 } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
714 crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
715 "(%s)", current_join_id,
716 fsa_state2string(controld_globals.fsa_state));
717
718 } else {
719 controld_set_fsa_input_flags(R_HAVE_CIB);
720
721
722 if (!check_join_state(controld_globals.fsa_state, __func__)) {
723 int count_finalizable = 0;
724
725 count_finalizable = crmd_join_phase_count(controld_join_integrated)
726 + crmd_join_phase_count(controld_join_nack);
727
728 crm_debug("Notifying %d node%s of join-%d results",
729 count_finalizable, pcmk__plural_s(count_finalizable),
730 current_join_id);
731 g_hash_table_foreach(pcmk__peer_cache, finalize_join_for, NULL);
732 }
733 }
734 }
735
736 static void
737 join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
738 xmlNode *output, void *user_data)
739 {
740 const char *node = user_data;
741
742 if (rc != pcmk_ok) {
743 fsa_data_t *msg_data = NULL;
744
745 crm_crit("join-%d node history update (via CIB call %d) for node %s "
746 "failed: %s",
747 current_join_id, call_id, node, pcmk_strerror(rc));
748 crm_log_xml_debug(msg, "failed");
749 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
750 }
751
752 crm_debug("join-%d node history update (via CIB call %d) for node %s "
753 "complete",
754 current_join_id, call_id, node);
755 check_join_state(controld_globals.fsa_state, __func__);
756 }
757
758
759 void
760 do_dc_join_ack(long long action,
761 enum crmd_fsa_cause cause,
762 enum crmd_fsa_state cur_state,
763 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
764 {
765 int join_id = -1;
766 ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
767
768 const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK);
769 char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC);
770 pcmk__node_status_t *peer = NULL;
771 enum controld_join_phase phase = controld_join_none;
772
773 enum controld_section_e section = controld_section_lrm;
774 char *xpath = NULL;
775 xmlNode *state = join_ack->xml;
776 xmlNode *execd_state = NULL;
777
778 cib_t *cib = controld_globals.cib_conn;
779 int rc = pcmk_ok;
780
781
782 if (join_from == NULL) {
783 crm_warn("Ignoring message received without node identification");
784 goto done;
785 }
786 if (op == NULL) {
787 crm_warn("Ignoring message received from %s without task", join_from);
788 goto done;
789 }
790
791 if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
792 crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
793 op, join_from, CRM_OP_JOIN_CONFIRM);
794 goto done;
795 }
796
797 if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) {
798 crm_warn("Ignoring join confirmation from %s without valid join ID",
799 join_from);
800 goto done;
801 }
802
803 peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
804 phase = controld_get_join_phase(peer);
805 if (phase != controld_join_finalized) {
806 crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
807 "(currently %s not %s)",
808 join_id, join_from, join_phase_text(phase),
809 join_phase_text(controld_join_finalized));
810 goto done;
811 }
812
813 if (join_id != current_join_id) {
814 crm_err("Rejecting join-%d confirmation from %s "
815 "because currently on join-%d",
816 join_id, join_from, current_join_id);
817 crm_update_peer_join(__func__, peer, controld_join_nack);
818 goto done;
819 }
820
821 crm_update_peer_join(__func__, peer, controld_join_confirmed);
822
823
824
825
826
827
828 rc = cib->cmds->init_transaction(cib);
829 if (rc != pcmk_ok) {
830 goto done;
831 }
832
833
834 if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
835 section = controld_section_lrm_unlocked;
836 }
837 controld_node_state_deletion_strings(join_from, section, &xpath, NULL);
838
839 rc = cib->cmds->remove(cib, xpath, NULL,
840 cib_xpath|cib_multiple|cib_transaction);
841 if (rc != pcmk_ok) {
842 goto done;
843 }
844
845
846 if (controld_is_local_node(join_from)) {
847
848
849 execd_state = controld_query_executor_state();
850
851 if (execd_state != NULL) {
852 crm_debug("Updating local node history for join-%d from query "
853 "result",
854 current_join_id);
855 state = execd_state;
856
857 } else {
858 crm_warn("Updating local node history from join-%d confirmation "
859 "because query failed",
860 current_join_id);
861 }
862
863 } else {
864 crm_debug("Updating node history for %s from join-%d confirmation",
865 join_from, current_join_id);
866 }
867
868 rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state,
869 cib_can_create|cib_transaction);
870 pcmk__xml_free(execd_state);
871 if (rc != pcmk_ok) {
872 goto done;
873 }
874
875
876 rc = cib->cmds->end_transaction(cib, true, cib_none);
877 fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);
878
879 if (rc > 0) {
880
881 join_from = NULL;
882 rc = pcmk_ok;
883 }
884
885 done:
886 if (rc != pcmk_ok) {
887 crm_crit("join-%d node history update for node %s failed: %s",
888 current_join_id, join_from, pcmk_strerror(rc));
889 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
890 }
891 free(join_from);
892 free(xpath);
893 }
894
895 void
896 finalize_join_for(gpointer key, gpointer value, gpointer user_data)
897 {
898 xmlNode *acknak = NULL;
899 xmlNode *tmp1 = NULL;
900 pcmk__node_status_t *join_node = value;
901 const char *join_to = join_node->name;
902 enum controld_join_phase phase = controld_get_join_phase(join_node);
903 bool integrated = false;
904
905 switch (phase) {
906 case controld_join_integrated:
907 integrated = true;
908 break;
909 case controld_join_nack:
910 break;
911 default:
912 crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
913 "for join-%d",
914 join_to, join_phase_text(phase), current_join_id);
915 return;
916 }
917
918
919
920
921 crm_trace("Updating node name and UUID in CIB for %s", join_to);
922 tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
923 crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_node_uuid(join_node));
924 crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
925 fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
926 pcmk__xml_free(tmp1);
927
928 join_node = pcmk__get_node(0, join_to, NULL,
929 pcmk__node_search_cluster_member);
930 if (!pcmk__cluster_is_node_active(join_node)) {
931
932
933
934
935
936
937
938
939
940 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
941 return;
942 }
943
944
945 crm_debug("%sing join-%d request from %s",
946 integrated? "Acknowledg" : "Nack", current_join_id, join_to);
947 acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
948 pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
949
950 if (integrated) {
951
952 crm_update_peer_join(__func__, join_node, controld_join_finalized);
953 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
954
955
956
957
958
959 if (pcmk__cluster_num_remote_nodes() > 0) {
960 GHashTableIter iter;
961 pcmk__node_status_t *node = NULL;
962 xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);
963
964 g_hash_table_iter_init(&iter, pcmk__remote_peer_cache);
965 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
966 xmlNode *remote = NULL;
967
968 if (!node->conn_host) {
969 continue;
970 }
971
972 remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
973 pcmk__xe_set_props(remote,
974 PCMK_XA_ID, node->name,
975 PCMK__XA_NODE_STATE, node->state,
976 PCMK__XA_CONNECTION_HOST, node->conn_host,
977 NULL);
978 }
979 }
980 }
981 pcmk__cluster_send_message(join_node, pcmk_ipc_controld, acknak);
982 pcmk__xml_free(acknak);
983 return;
984 }
985
986 gboolean
987 check_join_state(enum crmd_fsa_state cur_state, const char *source)
988 {
989 static unsigned long long highest_seq = 0;
990
991 if (controld_globals.membership_id != controld_globals.peer_seq) {
992 crm_debug("join-%d: Membership changed from %llu to %llu "
993 QB_XS " highest=%llu state=%s for=%s",
994 current_join_id, controld_globals.membership_id,
995 controld_globals.peer_seq, highest_seq,
996 fsa_state2string(cur_state), source);
997 if (highest_seq < controld_globals.peer_seq) {
998
999 highest_seq = controld_globals.peer_seq;
1000 register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
1001 }
1002
1003 } else if (cur_state == S_INTEGRATION) {
1004 if (crmd_join_phase_count(controld_join_welcomed) == 0) {
1005 int count = crmd_join_phase_count(controld_join_integrated);
1006
1007 crm_debug("join-%d: Integration of %d peer%s complete "
1008 QB_XS " state=%s for=%s",
1009 current_join_id, count, pcmk__plural_s(count),
1010 fsa_state2string(cur_state), source);
1011 register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
1012 return TRUE;
1013 }
1014
1015 } else if (cur_state == S_FINALIZE_JOIN) {
1016 if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
1017 crm_debug("join-%d: Delaying finalization until we have CIB "
1018 QB_XS " state=%s for=%s",
1019 current_join_id, fsa_state2string(cur_state), source);
1020 return TRUE;
1021
1022 } else if (crmd_join_phase_count(controld_join_welcomed) != 0) {
1023 int count = crmd_join_phase_count(controld_join_welcomed);
1024
1025 crm_debug("join-%d: Still waiting on %d welcomed node%s "
1026 QB_XS " state=%s for=%s",
1027 current_join_id, count, pcmk__plural_s(count),
1028 fsa_state2string(cur_state), source);
1029 crmd_join_phase_log(LOG_DEBUG);
1030
1031 } else if (crmd_join_phase_count(controld_join_integrated) != 0) {
1032 int count = crmd_join_phase_count(controld_join_integrated);
1033
1034 crm_debug("join-%d: Still waiting on %d integrated node%s "
1035 QB_XS " state=%s for=%s",
1036 current_join_id, count, pcmk__plural_s(count),
1037 fsa_state2string(cur_state), source);
1038 crmd_join_phase_log(LOG_DEBUG);
1039
1040 } else if (crmd_join_phase_count(controld_join_finalized) != 0) {
1041 int count = crmd_join_phase_count(controld_join_finalized);
1042
1043 crm_debug("join-%d: Still waiting on %d finalized node%s "
1044 QB_XS " state=%s for=%s",
1045 current_join_id, count, pcmk__plural_s(count),
1046 fsa_state2string(cur_state), source);
1047 crmd_join_phase_log(LOG_DEBUG);
1048
1049 } else {
1050 crm_debug("join-%d: Complete " QB_XS " state=%s for=%s",
1051 current_join_id, fsa_state2string(cur_state), source);
1052 register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
1053 return TRUE;
1054 }
1055 }
1056
1057 return FALSE;
1058 }
1059
1060 void
1061 do_dc_join_final(long long action,
1062 enum crmd_fsa_cause cause,
1063 enum crmd_fsa_state cur_state,
1064 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
1065 {
1066 crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
1067 crm_update_quorum(pcmk__cluster_has_quorum(), TRUE);
1068 }
1069
1070 int crmd_join_phase_count(enum controld_join_phase phase)
1071 {
1072 int count = 0;
1073 pcmk__node_status_t *peer;
1074 GHashTableIter iter;
1075
1076 g_hash_table_iter_init(&iter, pcmk__peer_cache);
1077 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1078 if (controld_get_join_phase(peer) == phase) {
1079 count++;
1080 }
1081 }
1082 return count;
1083 }
1084
1085 void crmd_join_phase_log(int level)
1086 {
1087 pcmk__node_status_t *peer;
1088 GHashTableIter iter;
1089
1090 g_hash_table_iter_init(&iter, pcmk__peer_cache);
1091 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1092 do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->name,
1093 join_phase_text(controld_get_join_phase(peer)));
1094 }
1095 }