This source file includes following definitions.
- join_phase_text
- controld_destroy_failed_sync_table
- controld_remove_failed_sync_node
- record_failed_sync_node
- lookup_failed_sync_node
- crm_update_peer_join
- start_join_round
- create_dc_message
- join_make_offer
- do_dc_join_offer_all
- do_dc_join_offer_one
- compare_int_fields
- do_dc_join_filter_offer
- do_dc_join_finalize
- free_max_generation
- finalize_sync_callback
- join_node_state_commit_callback
- do_dc_join_ack
- finalize_join_for
- check_join_state
- do_dc_join_final
- crmd_join_phase_count
- crmd_join_phase_log
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <inttypes.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16
17 #include <glib.h>
18 #include <libxml/tree.h>
19
20 #include <crm/crm.h>
21
22 #include <crm/common/xml.h>
23 #include <crm/cluster.h>
24
25 #include <pacemaker-controld.h>
26
27 static char *max_generation_from = NULL;
28 static xmlNodePtr max_generation_xml = NULL;
29
30
31
32
33
34
35
36
37
38
39
40 static GHashTable *failed_sync_nodes = NULL;
41
42 void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
43 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
44 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
45
46
47
48
49 static int current_join_id = 0;
50
51
52
53
54
55
56
57
58
59 static const char *
60 join_phase_text(enum controld_join_phase phase)
61 {
62 switch (phase) {
63 case controld_join_nack:
64 return "nack";
65 case controld_join_none:
66 return "none";
67 case controld_join_welcomed:
68 return "welcomed";
69 case controld_join_integrated:
70 return "integrated";
71 case controld_join_finalized:
72 return "finalized";
73 case controld_join_confirmed:
74 return "confirmed";
75 default:
76 return "invalid";
77 }
78 }
79
80
81
82
83
84 void
85 controld_destroy_failed_sync_table(void)
86 {
87 if (failed_sync_nodes != NULL) {
88 g_hash_table_destroy(failed_sync_nodes);
89 failed_sync_nodes = NULL;
90 }
91 }
92
93
94
95
96
97
98
99 void
100 controld_remove_failed_sync_node(const char *node_name)
101 {
102 if (failed_sync_nodes != NULL) {
103 g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
104 }
105 }
106
107
108
109
110
111
112
113
114 static void
115 record_failed_sync_node(const char *node_name, gint join_id)
116 {
117 if (failed_sync_nodes == NULL) {
118 failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
119 }
120
121
122
123
124 CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
125 GINT_TO_POINTER(join_id)));
126 }
127
128
129
130
131
132
133
134
135
136
137
138
139 static int
140 lookup_failed_sync_node(const char *node_name, gint *join_id)
141 {
142 *join_id = -1;
143
144 if (failed_sync_nodes != NULL) {
145 gpointer result = g_hash_table_lookup(failed_sync_nodes,
146 (gchar *) node_name);
147 if (result != NULL) {
148 *join_id = GPOINTER_TO_INT(result);
149 return pcmk_rc_ok;
150 }
151 }
152 return pcmk_rc_node_unknown;
153 }
154
155 void
156 crm_update_peer_join(const char *source, pcmk__node_status_t *node,
157 enum controld_join_phase phase)
158 {
159 enum controld_join_phase last = controld_get_join_phase(node);
160
161 CRM_CHECK(node != NULL, return);
162
163
164 if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
165 return;
166 }
167
168 if (phase == last) {
169 crm_trace("Node %s join-%d phase is still %s "
170 QB_XS " nodeid=%" PRIu32 " source=%s",
171 node->name, current_join_id, join_phase_text(last),
172 node->cluster_layer_id, source);
173 return;
174 }
175
176 if ((phase <= controld_join_none) || (phase == (last + 1))) {
177 struct controld_node_status_data *data = NULL;
178
179 if (node->user_data == NULL) {
180 node->user_data =
181 pcmk__assert_alloc(1, sizeof(struct controld_node_status_data));
182 }
183 data = node->user_data;
184 data->join_phase = phase;
185
186 crm_trace("Node %s join-%d phase is now %s (was %s) "
187 QB_XS " nodeid=%" PRIu32 " source=%s",
188 node->name, current_join_id, join_phase_text(phase),
189 join_phase_text(last), node->cluster_layer_id,
190 source);
191 return;
192 }
193
194 crm_warn("Rejecting join-%d phase update for node %s because can't go from "
195 "%s to %s " QB_XS " nodeid=%" PRIu32 " source=%s",
196 current_join_id, node->name, join_phase_text(last),
197 join_phase_text(phase), node->cluster_layer_id, source);
198 }
199
200 static void
201 start_join_round(void)
202 {
203 GHashTableIter iter;
204 pcmk__node_status_t *peer = NULL;
205
206 crm_debug("Starting new join round join-%d", current_join_id);
207
208 g_hash_table_iter_init(&iter, pcmk__peer_cache);
209 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
210 crm_update_peer_join(__func__, peer, controld_join_none);
211 }
212 if (max_generation_from != NULL) {
213 free(max_generation_from);
214 max_generation_from = NULL;
215 }
216 if (max_generation_xml != NULL) {
217 pcmk__xml_free(max_generation_xml);
218 max_generation_xml = NULL;
219 }
220 controld_clear_fsa_input_flags(R_HAVE_CIB);
221 }
222
223
224
225
226
227
228
229
230 static xmlNode *
231 create_dc_message(const char *join_op, const char *host_to)
232 {
233 xmlNode *msg = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_DC, host_to,
234 CRM_SYSTEM_CRMD, join_op, NULL);
235
236
237 crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id);
238
239
240
241
242 pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING,
243 pcmk_is_set(controld_globals.fsa_input_register,
244 R_SHUTDOWN));
245 return msg;
246 }
247
248 static void
249 join_make_offer(gpointer key, gpointer value, gpointer user_data)
250 {
251
252
253
254 xmlNode *offer = NULL;
255 pcmk__node_status_t *member = (pcmk__node_status_t *) value;
256
257 pcmk__assert(member != NULL);
258 if (!pcmk__cluster_is_node_active(member)) {
259 crm_info("Not making join-%d offer to inactive node %s",
260 current_join_id, pcmk__s(member->name, "with unknown name"));
261 if ((member->expected == NULL)
262 && pcmk__str_eq(member->state, PCMK__VALUE_LOST, pcmk__str_none)) {
263
264
265
266
267
268
269
270
271
272 pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
273 }
274 return;
275 }
276
277 if (member->name == NULL) {
278 crm_info("Not making join-%d offer to node uuid %s with unknown name",
279 current_join_id, member->xml_id);
280 return;
281 }
282
283 if (controld_globals.membership_id != controld_globals.peer_seq) {
284 controld_globals.membership_id = controld_globals.peer_seq;
285 crm_info("Making join-%d offers based on membership event %llu",
286 current_join_id, controld_globals.peer_seq);
287 }
288
289 if (user_data != NULL) {
290 enum controld_join_phase phase = controld_get_join_phase(member);
291
292 if (phase > controld_join_none) {
293 crm_info("Not making join-%d offer to already known node %s (%s)",
294 current_join_id, member->name, join_phase_text(phase));
295 return;
296 }
297 }
298
299 crm_update_peer_join(__func__, (pcmk__node_status_t*) member,
300 controld_join_none);
301
302 offer = create_dc_message(CRM_OP_JOIN_OFFER, member->name);
303
304
305 crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
306
307 crm_info("Sending join-%d offer to %s", current_join_id, member->name);
308 pcmk__cluster_send_message(member, pcmk_ipc_controld, offer);
309 pcmk__xml_free(offer);
310
311 crm_update_peer_join(__func__, member, controld_join_welcomed);
312 }
313
314
315 void
316 do_dc_join_offer_all(long long action,
317 enum crmd_fsa_cause cause,
318 enum crmd_fsa_state cur_state,
319 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
320 {
321 int count;
322
323
324
325
326
327 current_join_id++;
328 start_join_round();
329
330 update_dc(NULL);
331 if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
332 crm_info("A new node joined the cluster");
333 }
334 g_hash_table_foreach(pcmk__peer_cache, join_make_offer, NULL);
335
336 count = crmd_join_phase_count(controld_join_welcomed);
337 crm_info("Waiting on join-%d requests from %d outstanding node%s",
338 current_join_id, count, pcmk__plural_s(count));
339
340
341 }
342
343
344 void
345 do_dc_join_offer_one(long long action,
346 enum crmd_fsa_cause cause,
347 enum crmd_fsa_state cur_state,
348 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
349 {
350 pcmk__node_status_t *member = NULL;
351 ha_msg_input_t *welcome = NULL;
352 int count;
353 const char *join_to = NULL;
354
355 if (msg_data->data == NULL) {
356 crm_info("Making join-%d offers to any unconfirmed nodes "
357 "because an unknown node joined", current_join_id);
358 g_hash_table_foreach(pcmk__peer_cache, join_make_offer, &member);
359 check_join_state(cur_state, __func__);
360 return;
361 }
362
363 welcome = fsa_typed_data(fsa_dt_ha_msg);
364 if (welcome == NULL) {
365
366 return;
367 }
368
369 join_to = crm_element_value(welcome->msg, PCMK__XA_SRC);
370 if (join_to == NULL) {
371 crm_err("Can't make join-%d offer to unknown node", current_join_id);
372 return;
373 }
374 member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);
375
376
377
378
379
380
381 crm_update_peer_join(__func__, member, controld_join_none);
382 join_make_offer(NULL, member, NULL);
383
384
385
386
387 if (!controld_is_local_node(join_to)) {
388 member = controld_get_local_node_status();
389 join_make_offer(NULL, member, NULL);
390 }
391
392
393
394
395 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
396 NULL);
397
398 count = crmd_join_phase_count(controld_join_welcomed);
399 crm_info("Waiting on join-%d requests from %d outstanding node%s",
400 current_join_id, count, pcmk__plural_s(count));
401
402
403 }
404
405 static int
406 compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
407 {
408 const char *elem_l = crm_element_value(left, field);
409 const char *elem_r = crm_element_value(right, field);
410
411 long long int_elem_l;
412 long long int_elem_r;
413
414 int rc = pcmk_rc_ok;
415
416 rc = pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
417 if (rc != pcmk_rc_ok) {
418 crm_warn("Comparing current CIB %s as -1 "
419 "because '%s' is not an integer", field, elem_l);
420 }
421
422 rc = pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
423 if (rc != pcmk_rc_ok) {
424 crm_warn("Comparing joining node's CIB %s as -1 "
425 "because '%s' is not an integer", field, elem_r);
426 }
427
428 if (int_elem_l < int_elem_r) {
429 return -1;
430
431 } else if (int_elem_l > int_elem_r) {
432 return 1;
433 }
434
435 return 0;
436 }
437
438
439 void
440 do_dc_join_filter_offer(long long action,
441 enum crmd_fsa_cause cause,
442 enum crmd_fsa_state cur_state,
443 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
444 {
445 xmlNode *generation = NULL;
446
447 int cmp = 0;
448 int join_id = -1;
449 int count = 0;
450 gint value = 0;
451 gboolean ack_nack_bool = TRUE;
452 ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
453
454 const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC);
455 const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE);
456 const char *join_version = crm_element_value(join_ack->msg,
457 PCMK_XA_CRM_FEATURE_SET);
458 pcmk__node_status_t *join_node = NULL;
459
460 if (join_from == NULL) {
461 crm_err("Ignoring invalid join request without node name");
462 return;
463 }
464 join_node = pcmk__get_node(0, join_from, NULL,
465 pcmk__node_search_cluster_member);
466
467 crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
468 if (join_id != current_join_id) {
469 crm_debug("Ignoring join-%d request from %s because we are on join-%d",
470 join_id, join_from, current_join_id);
471 check_join_state(cur_state, __func__);
472 return;
473 }
474
475 generation = join_ack->xml;
476 if (max_generation_xml != NULL && generation != NULL) {
477 int lpc = 0;
478
479 const char *attributes[] = {
480 PCMK_XA_ADMIN_EPOCH,
481 PCMK_XA_EPOCH,
482 PCMK_XA_NUM_UPDATES,
483 };
484
485
486
487
488 if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
489 for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
490 cmp = compare_int_fields(max_generation_xml, generation,
491 attributes[lpc]);
492 }
493
494 } else {
495 CRM_LOG_ASSERT(false);
496 }
497 }
498
499 if (ref == NULL) {
500 ref = "none";
501 }
502
503 if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
504 crm_err("Rejecting join-%d request from node %s because we failed to "
505 "sync its CIB in join-%d " QB_XS " ref=%s",
506 join_id, join_from, value, ref);
507 ack_nack_bool = FALSE;
508
509 } else if (!pcmk__cluster_is_node_active(join_node)) {
510 if (match_down_event(join_from) != NULL) {
511
512
513
514
515
516
517 crm_debug("Rejecting join-%d request from inactive node %s "
518 QB_XS " ref=%s", join_id, join_from, ref);
519 } else {
520 crm_err("Rejecting join-%d request from inactive node %s "
521 QB_XS " ref=%s", join_id, join_from, ref);
522 }
523 ack_nack_bool = FALSE;
524
525 } else if (generation == NULL) {
526 crm_err("Rejecting invalid join-%d request from node %s "
527 "missing CIB generation " QB_XS " ref=%s",
528 join_id, join_from, ref);
529 ack_nack_bool = FALSE;
530
531 } else if ((join_version == NULL)
532 || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
533 crm_err("Rejecting join-%d request from node %s because feature set %s"
534 " is incompatible with ours (%s) " QB_XS " ref=%s",
535 join_id, join_from, (join_version? join_version : "pre-3.1.0"),
536 CRM_FEATURE_SET, ref);
537 ack_nack_bool = FALSE;
538
539 } else if (max_generation_xml == NULL) {
540 const char *validation = crm_element_value(generation,
541 PCMK_XA_VALIDATE_WITH);
542
543 if (pcmk__get_schema(validation) == NULL) {
544 crm_err("Rejecting join-%d request from %s (with first CIB "
545 "generation) due to %s schema version %s " QB_XS " ref=%s",
546 join_id, join_from,
547 ((validation == NULL)? "missing" : "unknown"),
548 pcmk__s(validation, ""), ref);
549 ack_nack_bool = FALSE;
550
551 } else {
552 crm_debug("Accepting join-%d request from %s (with first CIB "
553 "generation) " QB_XS " ref=%s",
554 join_id, join_from, ref);
555 max_generation_xml = pcmk__xml_copy(NULL, generation);
556 pcmk__str_update(&max_generation_from, join_from);
557 }
558
559 } else if ((cmp < 0)
560 || ((cmp == 0) && controld_is_local_node(join_from))) {
561 const char *validation = crm_element_value(generation,
562 PCMK_XA_VALIDATE_WITH);
563
564 if (pcmk__get_schema(validation) == NULL) {
565 crm_err("Rejecting join-%d request from %s (with better CIB "
566 "generation than current best from %s) due to %s "
567 "schema version %s " QB_XS " ref=%s",
568 join_id, join_from, max_generation_from,
569 ((validation == NULL)? "missing" : "unknown"),
570 pcmk__s(validation, ""), ref);
571 ack_nack_bool = FALSE;
572
573 } else {
574 crm_debug("Accepting join-%d request from %s (with better CIB "
575 "generation than current best from %s) " QB_XS " ref=%s",
576 join_id, join_from, max_generation_from, ref);
577 crm_log_xml_debug(max_generation_xml, "Old max generation");
578 crm_log_xml_debug(generation, "New max generation");
579
580 pcmk__xml_free(max_generation_xml);
581 max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
582 pcmk__str_update(&max_generation_from, join_from);
583 }
584
585 } else {
586 crm_debug("Accepting join-%d request from %s " QB_XS " ref=%s",
587 join_id, join_from, ref);
588 }
589
590 if (!ack_nack_bool) {
591 crm_update_peer_join(__func__, join_node, controld_join_nack);
592 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
593
594 } else {
595 crm_update_peer_join(__func__, join_node, controld_join_integrated);
596 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
597 }
598
599 count = crmd_join_phase_count(controld_join_integrated);
600 crm_debug("%d node%s currently integrated in join-%d",
601 count, pcmk__plural_s(count), join_id);
602
603 if (check_join_state(cur_state, __func__) == FALSE) {
604
605 count = crmd_join_phase_count(controld_join_welcomed);
606 crm_debug("Waiting on join-%d requests from %d outstanding node%s",
607 join_id, count, pcmk__plural_s(count));
608 }
609 }
610
611
612 void
613 do_dc_join_finalize(long long action,
614 enum crmd_fsa_cause cause,
615 enum crmd_fsa_state cur_state,
616 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
617 {
618 char *sync_from = NULL;
619 int rc = pcmk_ok;
620 int count_welcomed = crmd_join_phase_count(controld_join_welcomed);
621 int count_finalizable = crmd_join_phase_count(controld_join_integrated)
622 + crmd_join_phase_count(controld_join_nack);
623
624
625
626
627 if (count_welcomed != 0) {
628 crm_debug("Waiting on join-%d requests from %d outstanding node%s "
629 "before finalizing join", current_join_id, count_welcomed,
630 pcmk__plural_s(count_welcomed));
631 crmd_join_phase_log(LOG_DEBUG);
632
633 return;
634
635 } else if (count_finalizable == 0) {
636 crm_debug("Finalization not needed for join-%d at the current time",
637 current_join_id);
638 crmd_join_phase_log(LOG_DEBUG);
639 check_join_state(controld_globals.fsa_state, __func__);
640 return;
641 }
642
643 controld_clear_fsa_input_flags(R_HAVE_CIB);
644 if ((max_generation_from == NULL)
645 || controld_is_local_node(max_generation_from)) {
646 controld_set_fsa_input_flags(R_HAVE_CIB);
647 }
648
649 if (!controld_globals.transition_graph->complete) {
650 crm_warn("Delaying join-%d finalization while transition in progress",
651 current_join_id);
652 crmd_join_phase_log(LOG_DEBUG);
653 crmd_fsa_stall(FALSE);
654 return;
655 }
656
657 if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
658
659 sync_from = pcmk__str_copy(controld_globals.cluster->priv->node_name);
660 } else {
661
662 sync_from = pcmk__str_copy(max_generation_from);
663 }
664 crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB %s.%s.%s "
665 "with schema %s and feature set %s from %s)",
666 current_join_id, count_finalizable,
667 pcmk__plural_s(count_finalizable),
668 crm_element_value(max_generation_xml, PCMK_XA_ADMIN_EPOCH),
669 crm_element_value(max_generation_xml, PCMK_XA_EPOCH),
670 crm_element_value(max_generation_xml, PCMK_XA_NUM_UPDATES),
671 crm_element_value(max_generation_xml, PCMK_XA_VALIDATE_WITH),
672 crm_element_value(max_generation_xml, PCMK_XA_CRM_FEATURE_SET),
673 sync_from);
674 crmd_join_phase_log(LOG_DEBUG);
675
676 rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
677 sync_from, NULL, cib_none);
678 fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
679 }
680
681 void
682 free_max_generation(void)
683 {
684 free(max_generation_from);
685 max_generation_from = NULL;
686
687 pcmk__xml_free(max_generation_xml);
688 max_generation_xml = NULL;
689 }
690
691 void
692 finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
693 {
694 CRM_LOG_ASSERT(-EPERM != rc);
695
696 if (rc != pcmk_ok) {
697 const char *sync_from = (const char *) user_data;
698
699 do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
700 "Could not sync CIB from %s in join-%d: %s",
701 sync_from, current_join_id, pcmk_strerror(rc));
702
703 if (rc != -pcmk_err_old_data) {
704 record_failed_sync_node(sync_from, current_join_id);
705 }
706
707
708 register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
709 __func__);
710
711 } else if (!AM_I_DC) {
712 crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
713
714 } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
715 crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
716 "(%s)", current_join_id,
717 fsa_state2string(controld_globals.fsa_state));
718
719 } else {
720 controld_set_fsa_input_flags(R_HAVE_CIB);
721
722
723 if (!check_join_state(controld_globals.fsa_state, __func__)) {
724 int count_finalizable = 0;
725
726 count_finalizable = crmd_join_phase_count(controld_join_integrated)
727 + crmd_join_phase_count(controld_join_nack);
728
729 crm_debug("Notifying %d node%s of join-%d results",
730 count_finalizable, pcmk__plural_s(count_finalizable),
731 current_join_id);
732 g_hash_table_foreach(pcmk__peer_cache, finalize_join_for, NULL);
733 }
734 }
735 }
736
737 static void
738 join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
739 xmlNode *output, void *user_data)
740 {
741 const char *node = user_data;
742
743 if (rc != pcmk_ok) {
744 fsa_data_t *msg_data = NULL;
745
746 crm_crit("join-%d node history update (via CIB call %d) for node %s "
747 "failed: %s",
748 current_join_id, call_id, node, pcmk_strerror(rc));
749 crm_log_xml_debug(msg, "failed");
750 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
751 }
752
753 crm_debug("join-%d node history update (via CIB call %d) for node %s "
754 "complete",
755 current_join_id, call_id, node);
756 check_join_state(controld_globals.fsa_state, __func__);
757 }
758
759
760 void
761 do_dc_join_ack(long long action,
762 enum crmd_fsa_cause cause,
763 enum crmd_fsa_state cur_state,
764 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
765 {
766 int join_id = -1;
767 ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
768
769 const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK);
770 char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC);
771 pcmk__node_status_t *peer = NULL;
772 enum controld_join_phase phase = controld_join_none;
773
774 enum controld_section_e section = controld_section_lrm;
775 char *xpath = NULL;
776 xmlNode *state = join_ack->xml;
777 xmlNode *execd_state = NULL;
778
779 cib_t *cib = controld_globals.cib_conn;
780 int rc = pcmk_ok;
781
782
783 if (join_from == NULL) {
784 crm_warn("Ignoring message received without node identification");
785 goto done;
786 }
787 if (op == NULL) {
788 crm_warn("Ignoring message received from %s without task", join_from);
789 goto done;
790 }
791
792 if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
793 crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
794 op, join_from, CRM_OP_JOIN_CONFIRM);
795 goto done;
796 }
797
798 if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) {
799 crm_warn("Ignoring join confirmation from %s without valid join ID",
800 join_from);
801 goto done;
802 }
803
804 peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
805 phase = controld_get_join_phase(peer);
806 if (phase != controld_join_finalized) {
807 crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
808 "(currently %s not %s)",
809 join_id, join_from, join_phase_text(phase),
810 join_phase_text(controld_join_finalized));
811 goto done;
812 }
813
814 if (join_id != current_join_id) {
815 crm_err("Rejecting join-%d confirmation from %s "
816 "because currently on join-%d",
817 join_id, join_from, current_join_id);
818 crm_update_peer_join(__func__, peer, controld_join_nack);
819 goto done;
820 }
821
822 crm_update_peer_join(__func__, peer, controld_join_confirmed);
823
824
825
826
827
828
829 rc = cib->cmds->init_transaction(cib);
830 if (rc != pcmk_ok) {
831 goto done;
832 }
833
834
835 if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
836 section = controld_section_lrm_unlocked;
837 }
838 controld_node_state_deletion_strings(join_from, section, &xpath, NULL);
839
840 rc = cib->cmds->remove(cib, xpath, NULL,
841 cib_xpath|cib_multiple|cib_transaction);
842 if (rc != pcmk_ok) {
843 goto done;
844 }
845
846
847 if (controld_is_local_node(join_from)) {
848
849
850 execd_state = controld_query_executor_state();
851
852 if (execd_state != NULL) {
853 crm_debug("Updating local node history for join-%d from query "
854 "result",
855 current_join_id);
856 state = execd_state;
857
858 } else {
859 crm_warn("Updating local node history from join-%d confirmation "
860 "because query failed",
861 current_join_id);
862 }
863
864 } else {
865 crm_debug("Updating node history for %s from join-%d confirmation",
866 join_from, current_join_id);
867 }
868
869 rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state,
870 cib_can_create|cib_transaction);
871 pcmk__xml_free(execd_state);
872 if (rc != pcmk_ok) {
873 goto done;
874 }
875
876
877 rc = cib->cmds->end_transaction(cib, true, cib_none);
878 fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);
879
880 if (rc > 0) {
881
882 join_from = NULL;
883 rc = pcmk_ok;
884 }
885
886 done:
887 if (rc != pcmk_ok) {
888 crm_crit("join-%d node history update for node %s failed: %s",
889 current_join_id, join_from, pcmk_strerror(rc));
890 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
891 }
892 free(join_from);
893 free(xpath);
894 }
895
896 void
897 finalize_join_for(gpointer key, gpointer value, gpointer user_data)
898 {
899 xmlNode *acknak = NULL;
900 xmlNode *tmp1 = NULL;
901 pcmk__node_status_t *join_node = value;
902 const char *join_to = join_node->name;
903 enum controld_join_phase phase = controld_get_join_phase(join_node);
904 bool integrated = false;
905
906 switch (phase) {
907 case controld_join_integrated:
908 integrated = true;
909 break;
910 case controld_join_nack:
911 break;
912 default:
913 crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
914 "for join-%d",
915 join_to, join_phase_text(phase), current_join_id);
916 return;
917 }
918
919
920
921
922 crm_trace("Updating node name and UUID in CIB for %s", join_to);
923 tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
924 crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_get_xml_id(join_node));
925 crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
926 fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
927 pcmk__xml_free(tmp1);
928
929 join_node = pcmk__get_node(0, join_to, NULL,
930 pcmk__node_search_cluster_member);
931 if (!pcmk__cluster_is_node_active(join_node)) {
932
933
934
935
936
937
938
939
940
941 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
942 return;
943 }
944
945
946 crm_debug("%sing join-%d request from %s",
947 integrated? "Acknowledg" : "Nack", current_join_id, join_to);
948 acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
949 pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
950
951 if (integrated) {
952
953 crm_update_peer_join(__func__, join_node, controld_join_finalized);
954 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
955
956
957
958
959
960 if (pcmk__cluster_num_remote_nodes() > 0) {
961 GHashTableIter iter;
962 pcmk__node_status_t *node = NULL;
963 xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);
964
965 g_hash_table_iter_init(&iter, pcmk__remote_peer_cache);
966 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
967 xmlNode *remote = NULL;
968
969 if (!node->conn_host) {
970 continue;
971 }
972
973 remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
974 pcmk__xe_set_props(remote,
975 PCMK_XA_ID, node->name,
976 PCMK__XA_NODE_STATE, node->state,
977 PCMK__XA_CONNECTION_HOST, node->conn_host,
978 NULL);
979 }
980 }
981 }
982 pcmk__cluster_send_message(join_node, pcmk_ipc_controld, acknak);
983 pcmk__xml_free(acknak);
984 return;
985 }
986
987 gboolean
988 check_join_state(enum crmd_fsa_state cur_state, const char *source)
989 {
990 static unsigned long long highest_seq = 0;
991
992 if (controld_globals.membership_id != controld_globals.peer_seq) {
993 crm_debug("join-%d: Membership changed from %llu to %llu "
994 QB_XS " highest=%llu state=%s for=%s",
995 current_join_id, controld_globals.membership_id,
996 controld_globals.peer_seq, highest_seq,
997 fsa_state2string(cur_state), source);
998 if (highest_seq < controld_globals.peer_seq) {
999
1000 highest_seq = controld_globals.peer_seq;
1001 register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
1002 }
1003
1004 } else if (cur_state == S_INTEGRATION) {
1005 if (crmd_join_phase_count(controld_join_welcomed) == 0) {
1006 int count = crmd_join_phase_count(controld_join_integrated);
1007
1008 crm_debug("join-%d: Integration of %d peer%s complete "
1009 QB_XS " state=%s for=%s",
1010 current_join_id, count, pcmk__plural_s(count),
1011 fsa_state2string(cur_state), source);
1012 register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
1013 return TRUE;
1014 }
1015
1016 } else if (cur_state == S_FINALIZE_JOIN) {
1017 if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
1018 crm_debug("join-%d: Delaying finalization until we have CIB "
1019 QB_XS " state=%s for=%s",
1020 current_join_id, fsa_state2string(cur_state), source);
1021 return TRUE;
1022
1023 } else if (crmd_join_phase_count(controld_join_welcomed) != 0) {
1024 int count = crmd_join_phase_count(controld_join_welcomed);
1025
1026 crm_debug("join-%d: Still waiting on %d welcomed node%s "
1027 QB_XS " state=%s for=%s",
1028 current_join_id, count, pcmk__plural_s(count),
1029 fsa_state2string(cur_state), source);
1030 crmd_join_phase_log(LOG_DEBUG);
1031
1032 } else if (crmd_join_phase_count(controld_join_integrated) != 0) {
1033 int count = crmd_join_phase_count(controld_join_integrated);
1034
1035 crm_debug("join-%d: Still waiting on %d integrated node%s "
1036 QB_XS " state=%s for=%s",
1037 current_join_id, count, pcmk__plural_s(count),
1038 fsa_state2string(cur_state), source);
1039 crmd_join_phase_log(LOG_DEBUG);
1040
1041 } else if (crmd_join_phase_count(controld_join_finalized) != 0) {
1042 int count = crmd_join_phase_count(controld_join_finalized);
1043
1044 crm_debug("join-%d: Still waiting on %d finalized node%s "
1045 QB_XS " state=%s for=%s",
1046 current_join_id, count, pcmk__plural_s(count),
1047 fsa_state2string(cur_state), source);
1048 crmd_join_phase_log(LOG_DEBUG);
1049
1050 } else {
1051 crm_debug("join-%d: Complete " QB_XS " state=%s for=%s",
1052 current_join_id, fsa_state2string(cur_state), source);
1053 register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
1054 return TRUE;
1055 }
1056 }
1057
1058 return FALSE;
1059 }
1060
1061 void
1062 do_dc_join_final(long long action,
1063 enum crmd_fsa_cause cause,
1064 enum crmd_fsa_state cur_state,
1065 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
1066 {
1067 crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
1068 crm_update_quorum(pcmk__cluster_has_quorum(), TRUE);
1069 }
1070
1071 int crmd_join_phase_count(enum controld_join_phase phase)
1072 {
1073 int count = 0;
1074 pcmk__node_status_t *peer;
1075 GHashTableIter iter;
1076
1077 g_hash_table_iter_init(&iter, pcmk__peer_cache);
1078 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1079 if (controld_get_join_phase(peer) == phase) {
1080 count++;
1081 }
1082 }
1083 return count;
1084 }
1085
1086 void crmd_join_phase_log(int level)
1087 {
1088 pcmk__node_status_t *peer;
1089 GHashTableIter iter;
1090
1091 g_hash_table_iter_init(&iter, pcmk__peer_cache);
1092 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1093 do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->name,
1094 join_phase_text(controld_get_join_phase(peer)));
1095 }
1096 }