This source file includes following definitions.
- controld_destroy_failed_sync_table
- controld_remove_failed_sync_node
- record_failed_sync_node
- lookup_failed_sync_node
- crm_update_peer_join
- start_join_round
- create_dc_message
- join_make_offer
- do_dc_join_offer_all
- do_dc_join_offer_one
- compare_int_fields
- do_dc_join_filter_offer
- do_dc_join_finalize
- free_max_generation
- finalize_sync_callback
- join_update_complete_callback
- do_dc_join_ack
- finalize_join_for
- check_join_state
- do_dc_join_final
- crmd_join_phase_count
- crmd_join_phase_log
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <crm/crm.h>
13
14 #include <crm/msg_xml.h>
15 #include <crm/common/xml.h>
16 #include <crm/cluster.h>
17
18 #include <pacemaker-controld.h>
19
20 static char *max_generation_from = NULL;
21 static xmlNodePtr max_generation_xml = NULL;
22
23
24
25
26
27
28
29
30
31
32
33 static GHashTable *failed_sync_nodes = NULL;
34
35 void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
36 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
37 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
38
39
40
41
42 static int current_join_id = 0;
43
44
45
46
47
48 void
49 controld_destroy_failed_sync_table(void)
50 {
51 if (failed_sync_nodes != NULL) {
52 g_hash_table_destroy(failed_sync_nodes);
53 failed_sync_nodes = NULL;
54 }
55 }
56
57
58
59
60
61
62
63 void
64 controld_remove_failed_sync_node(const char *node_name)
65 {
66 if (failed_sync_nodes != NULL) {
67 g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
68 }
69 }
70
71
72
73
74
75
76
77
78 static void
79 record_failed_sync_node(const char *node_name, gint join_id)
80 {
81 if (failed_sync_nodes == NULL) {
82 failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
83 }
84
85
86
87
88 CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
89 GINT_TO_POINTER(join_id)));
90 }
91
92
93
94
95
96
97
98
99
100
101
102
103 static int
104 lookup_failed_sync_node(const char *node_name, gint *join_id)
105 {
106 *join_id = -1;
107
108 if (failed_sync_nodes != NULL) {
109 gpointer result = g_hash_table_lookup(failed_sync_nodes,
110 (gchar *) node_name);
111 if (result != NULL) {
112 *join_id = GPOINTER_TO_INT(result);
113 return pcmk_rc_ok;
114 }
115 }
116 return pcmk_rc_node_unknown;
117 }
118
119 void
120 crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
121 {
122 enum crm_join_phase last = 0;
123
124 CRM_CHECK(node != NULL, return);
125
126
127 if (pcmk_is_set(node->flags, crm_remote_node)) {
128 return;
129 }
130
131 last = node->join;
132
133 if(phase == last) {
134 crm_trace("Node %s join-%d phase is still %s "
135 CRM_XS " nodeid=%u source=%s",
136 node->uname, current_join_id, crm_join_phase_str(last),
137 node->id, source);
138
139 } else if ((phase <= crm_join_none) || (phase == (last + 1))) {
140 node->join = phase;
141 crm_trace("Node %s join-%d phase is now %s (was %s) "
142 CRM_XS " nodeid=%u source=%s",
143 node->uname, current_join_id, crm_join_phase_str(phase),
144 crm_join_phase_str(last), node->id, source);
145
146 } else {
147 crm_warn("Rejecting join-%d phase update for node %s because "
148 "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
149 current_join_id, node->uname, crm_join_phase_str(last),
150 crm_join_phase_str(phase), node->id, source);
151 }
152 }
153
154 static void
155 start_join_round(void)
156 {
157 GHashTableIter iter;
158 crm_node_t *peer = NULL;
159
160 crm_debug("Starting new join round join-%d", current_join_id);
161
162 g_hash_table_iter_init(&iter, crm_peer_cache);
163 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
164 crm_update_peer_join(__func__, peer, crm_join_none);
165 }
166 if (max_generation_from != NULL) {
167 free(max_generation_from);
168 max_generation_from = NULL;
169 }
170 if (max_generation_xml != NULL) {
171 free_xml(max_generation_xml);
172 max_generation_xml = NULL;
173 }
174 controld_clear_fsa_input_flags(R_HAVE_CIB);
175 controld_forget_all_cib_replace_calls();
176 }
177
178
179
180
181
182
183
184
185 static xmlNode *
186 create_dc_message(const char *join_op, const char *host_to)
187 {
188 xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
189 CRM_SYSTEM_DC, NULL);
190
191
192 crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id);
193
194
195
196
197 pcmk__xe_set_bool_attr(msg, F_CRM_DC_LEAVING,
198 pcmk_is_set(controld_globals.fsa_input_register,
199 R_SHUTDOWN));
200 return msg;
201 }
202
203 static void
204 join_make_offer(gpointer key, gpointer value, gpointer user_data)
205 {
206 xmlNode *offer = NULL;
207 crm_node_t *member = (crm_node_t *)value;
208
209 CRM_ASSERT(member != NULL);
210 if (crm_is_peer_active(member) == FALSE) {
211 crm_info("Not making join-%d offer to inactive node %s",
212 current_join_id,
213 (member->uname? member->uname : "with unknown name"));
214 if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
215
216
217
218
219
220
221
222
223
224 pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
225 }
226 return;
227 }
228
229 if (member->uname == NULL) {
230 crm_info("Not making join-%d offer to node uuid %s with unknown name",
231 current_join_id, member->uuid);
232 return;
233 }
234
235 if (controld_globals.membership_id != crm_peer_seq) {
236 controld_globals.membership_id = crm_peer_seq;
237 crm_info("Making join-%d offers based on membership event %llu",
238 current_join_id, crm_peer_seq);
239 }
240
241 if(user_data && member->join > crm_join_none) {
242 crm_info("Not making join-%d offer to already known node %s (%s)",
243 current_join_id, member->uname,
244 crm_join_phase_str(member->join));
245 return;
246 }
247
248 crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);
249
250 offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
251
252
253 crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
254
255 crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
256 send_cluster_message(member, crm_msg_crmd, offer, TRUE);
257 free_xml(offer);
258
259 crm_update_peer_join(__func__, member, crm_join_welcomed);
260 }
261
262
263 void
264 do_dc_join_offer_all(long long action,
265 enum crmd_fsa_cause cause,
266 enum crmd_fsa_state cur_state,
267 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
268 {
269 int count;
270
271
272
273
274
275 current_join_id++;
276 start_join_round();
277
278 update_dc(NULL);
279 if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
280 crm_info("A new node joined the cluster");
281 }
282 g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
283
284 count = crmd_join_phase_count(crm_join_welcomed);
285 crm_info("Waiting on join-%d requests from %d outstanding node%s",
286 current_join_id, count, pcmk__plural_s(count));
287
288
289 }
290
291
292 void
293 do_dc_join_offer_one(long long action,
294 enum crmd_fsa_cause cause,
295 enum crmd_fsa_state cur_state,
296 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
297 {
298 crm_node_t *member;
299 ha_msg_input_t *welcome = NULL;
300 int count;
301 const char *join_to = NULL;
302
303 if (msg_data->data == NULL) {
304 crm_info("Making join-%d offers to any unconfirmed nodes "
305 "because an unknown node joined", current_join_id);
306 g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
307 check_join_state(cur_state, __func__);
308 return;
309 }
310
311 welcome = fsa_typed_data(fsa_dt_ha_msg);
312 if (welcome == NULL) {
313
314 return;
315 }
316
317 join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
318 if (join_to == NULL) {
319 crm_err("Can't make join-%d offer to unknown node", current_join_id);
320 return;
321 }
322 member = crm_get_peer(0, join_to);
323
324
325
326
327
328
329 crm_update_peer_join(__func__, member, crm_join_none);
330 join_make_offer(NULL, member, NULL);
331
332
333
334
335 if (strcasecmp(join_to, controld_globals.our_nodename) != 0) {
336 member = crm_get_peer(0, controld_globals.our_nodename);
337 join_make_offer(NULL, member, NULL);
338 }
339
340
341
342
343 abort_transition(INFINITY, pcmk__graph_restart, "Node join", NULL);
344
345 count = crmd_join_phase_count(crm_join_welcomed);
346 crm_info("Waiting on join-%d requests from %d outstanding node%s",
347 current_join_id, count, pcmk__plural_s(count));
348
349
350 }
351
352 static int
353 compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
354 {
355 const char *elem_l = crm_element_value(left, field);
356 const char *elem_r = crm_element_value(right, field);
357
358 long long int_elem_l;
359 long long int_elem_r;
360
361 pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
362 pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
363
364 if (int_elem_l < int_elem_r) {
365 return -1;
366
367 } else if (int_elem_l > int_elem_r) {
368 return 1;
369 }
370
371 return 0;
372 }
373
374
375 void
376 do_dc_join_filter_offer(long long action,
377 enum crmd_fsa_cause cause,
378 enum crmd_fsa_state cur_state,
379 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
380 {
381 xmlNode *generation = NULL;
382
383 int cmp = 0;
384 int join_id = -1;
385 int count = 0;
386 gint value = 0;
387 gboolean ack_nack_bool = TRUE;
388 ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
389
390 const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
391 const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
392 const char *join_version = crm_element_value(join_ack->msg,
393 XML_ATTR_CRM_VERSION);
394 crm_node_t *join_node = NULL;
395
396 if (join_from == NULL) {
397 crm_err("Ignoring invalid join request without node name");
398 return;
399 }
400 join_node = crm_get_peer(0, join_from);
401
402 crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
403 if (join_id != current_join_id) {
404 crm_debug("Ignoring join-%d request from %s because we are on join-%d",
405 join_id, join_from, current_join_id);
406 check_join_state(cur_state, __func__);
407 return;
408 }
409
410 generation = join_ack->xml;
411 if (max_generation_xml != NULL && generation != NULL) {
412 int lpc = 0;
413
414 const char *attributes[] = {
415 XML_ATTR_GENERATION_ADMIN,
416 XML_ATTR_GENERATION,
417 XML_ATTR_NUMUPDATES,
418 };
419
420 for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
421 cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
422 }
423 }
424
425 if (ref == NULL) {
426 ref = "none";
427 }
428
429 if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
430 crm_err("Rejecting join-%d request from node %s because we failed to "
431 "sync its CIB in join-%d " CRM_XS " ref=%s",
432 join_id, join_from, value, ref);
433 ack_nack_bool = FALSE;
434
435 } else if (!crm_is_peer_active(join_node)) {
436 if (match_down_event(join_from) != NULL) {
437
438
439
440
441
442
443 crm_debug("Rejecting join-%d request from inactive node %s "
444 CRM_XS " ref=%s", join_id, join_from, ref);
445 } else {
446 crm_err("Rejecting join-%d request from inactive node %s "
447 CRM_XS " ref=%s", join_id, join_from, ref);
448 }
449 ack_nack_bool = FALSE;
450
451 } else if (generation == NULL) {
452 crm_err("Rejecting invalid join-%d request from node %s "
453 "missing CIB generation " CRM_XS " ref=%s",
454 join_id, join_from, ref);
455 ack_nack_bool = FALSE;
456
457 } else if ((join_version == NULL)
458 || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
459 crm_err("Rejecting join-%d request from node %s because feature set %s"
460 " is incompatible with ours (%s) " CRM_XS " ref=%s",
461 join_id, join_from, (join_version? join_version : "pre-3.1.0"),
462 CRM_FEATURE_SET, ref);
463 ack_nack_bool = FALSE;
464
465 } else if (max_generation_xml == NULL) {
466 const char *validation = crm_element_value(generation,
467 XML_ATTR_VALIDATION);
468
469 if (get_schema_version(validation) < 0) {
470 crm_err("Rejecting join-%d request from %s (with first CIB "
471 "generation) due to unknown schema version %s "
472 CRM_XS " ref=%s",
473 join_id, join_from, validation, ref);
474 ack_nack_bool = FALSE;
475
476 } else {
477 crm_debug("Accepting join-%d request from %s (with first CIB "
478 "generation) " CRM_XS " ref=%s",
479 join_id, join_from, ref);
480 max_generation_xml = copy_xml(generation);
481 pcmk__str_update(&max_generation_from, join_from);
482 }
483
484 } else if ((cmp < 0)
485 || ((cmp == 0)
486 && pcmk__str_eq(join_from, controld_globals.our_nodename,
487 pcmk__str_casei))) {
488 const char *validation = crm_element_value(generation,
489 XML_ATTR_VALIDATION);
490
491 if (get_schema_version(validation) < 0) {
492 crm_err("Rejecting join-%d request from %s (with better CIB "
493 "generation than current best from %s) due to unknown "
494 "schema version %s " CRM_XS " ref=%s",
495 join_id, join_from, max_generation_from, validation, ref);
496 ack_nack_bool = FALSE;
497
498 } else {
499 crm_debug("Accepting join-%d request from %s (with better CIB "
500 "generation than current best from %s) " CRM_XS " ref=%s",
501 join_id, join_from, max_generation_from, ref);
502 crm_log_xml_debug(max_generation_xml, "Old max generation");
503 crm_log_xml_debug(generation, "New max generation");
504
505 free_xml(max_generation_xml);
506 max_generation_xml = copy_xml(join_ack->xml);
507 pcmk__str_update(&max_generation_from, join_from);
508 }
509
510 } else {
511 crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
512 join_id, join_from, ref);
513 }
514
515 if (!ack_nack_bool) {
516 if (compare_version(join_version, "3.17.0") < 0) {
517
518
519
520 crm_update_peer_join(__func__, join_node, crm_join_nack_quiet);
521 } else {
522 crm_update_peer_join(__func__, join_node, crm_join_nack);
523 }
524 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
525
526 } else {
527 crm_update_peer_join(__func__, join_node, crm_join_integrated);
528 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
529 }
530
531 count = crmd_join_phase_count(crm_join_integrated);
532 crm_debug("%d node%s currently integrated in join-%d",
533 count, pcmk__plural_s(count), join_id);
534
535 if (check_join_state(cur_state, __func__) == FALSE) {
536
537 count = crmd_join_phase_count(crm_join_welcomed);
538 crm_debug("Waiting on join-%d requests from %d outstanding node%s",
539 join_id, count, pcmk__plural_s(count));
540 }
541 }
542
543
544 void
545 do_dc_join_finalize(long long action,
546 enum crmd_fsa_cause cause,
547 enum crmd_fsa_state cur_state,
548 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
549 {
550 char *sync_from = NULL;
551 int rc = pcmk_ok;
552 int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
553 int count_finalizable = crmd_join_phase_count(crm_join_integrated)
554 + crmd_join_phase_count(crm_join_nack)
555 + crmd_join_phase_count(crm_join_nack_quiet);
556
557
558
559
560 if (count_welcomed != 0) {
561 crm_debug("Waiting on join-%d requests from %d outstanding node%s "
562 "before finalizing join", current_join_id, count_welcomed,
563 pcmk__plural_s(count_welcomed));
564 crmd_join_phase_log(LOG_DEBUG);
565
566 return;
567
568 } else if (count_finalizable == 0) {
569 crm_debug("Finalization not needed for join-%d at the current time",
570 current_join_id);
571 crmd_join_phase_log(LOG_DEBUG);
572 check_join_state(controld_globals.fsa_state, __func__);
573 return;
574 }
575
576 controld_clear_fsa_input_flags(R_HAVE_CIB);
577 if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename,
578 pcmk__str_null_matches|pcmk__str_casei)) {
579 controld_set_fsa_input_flags(R_HAVE_CIB);
580 }
581
582 if (!controld_globals.transition_graph->complete) {
583 crm_warn("Delaying join-%d finalization while transition in progress",
584 current_join_id);
585 crmd_join_phase_log(LOG_DEBUG);
586 crmd_fsa_stall(FALSE);
587 return;
588 }
589
590 if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
591
592 pcmk__str_update(&sync_from, controld_globals.our_nodename);
593 crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
594 current_join_id, count_finalizable,
595 pcmk__plural_s(count_finalizable));
596 crm_log_xml_debug(max_generation_xml, "Requested CIB version");
597
598 } else {
599
600 pcmk__str_update(&sync_from, max_generation_from);
601 crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
602 current_join_id, count_finalizable,
603 pcmk__plural_s(count_finalizable), sync_from);
604 crm_log_xml_notice(max_generation_xml, "Requested CIB version");
605 }
606 crmd_join_phase_log(LOG_DEBUG);
607
608 rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
609 sync_from, NULL, cib_none);
610
611 if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
612 controld_record_cib_replace_call(rc);
613 }
614 fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
615 }
616
617 void
618 free_max_generation(void)
619 {
620 free(max_generation_from);
621 max_generation_from = NULL;
622
623 free_xml(max_generation_xml);
624 max_generation_xml = NULL;
625 }
626
627 void
628 finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
629 {
630 CRM_LOG_ASSERT(-EPERM != rc);
631
632 controld_forget_cib_replace_call(call_id);
633
634 if (rc != pcmk_ok) {
635 const char *sync_from = (const char *) user_data;
636
637 do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
638 "Could not sync CIB from %s in join-%d: %s",
639 sync_from, current_join_id, pcmk_strerror(rc));
640
641 if (rc != -pcmk_err_old_data) {
642 record_failed_sync_node(sync_from, current_join_id);
643 }
644
645
646 register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
647 __func__);
648
649 } else if (!AM_I_DC) {
650 crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
651
652 } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
653 crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
654 "(%s)", current_join_id,
655 fsa_state2string(controld_globals.fsa_state));
656
657 } else {
658 controld_set_fsa_input_flags(R_HAVE_CIB);
659
660
661 if (!check_join_state(controld_globals.fsa_state, __func__)) {
662 int count_finalizable = 0;
663
664 count_finalizable = crmd_join_phase_count(crm_join_integrated)
665 + crmd_join_phase_count(crm_join_nack)
666 + crmd_join_phase_count(crm_join_nack_quiet);
667
668 crm_debug("Notifying %d node%s of join-%d results",
669 count_finalizable, pcmk__plural_s(count_finalizable),
670 current_join_id);
671 g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
672 }
673 }
674 }
675
676 static void
677 join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
678 {
679 fsa_data_t *msg_data = NULL;
680
681 if (rc == pcmk_ok) {
682 crm_debug("join-%d node history update (via CIB call %d) complete",
683 current_join_id, call_id);
684 check_join_state(controld_globals.fsa_state, __func__);
685
686 } else {
687 crm_err("join-%d node history update (via CIB call %d) failed: %s "
688 "(next transition may determine resource status incorrectly)",
689 current_join_id, call_id, pcmk_strerror(rc));
690 crm_log_xml_debug(msg, "failed");
691 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
692 }
693 }
694
695
696 void
697 do_dc_join_ack(long long action,
698 enum crmd_fsa_cause cause,
699 enum crmd_fsa_state cur_state,
700 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
701 {
702 int join_id = -1;
703 ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
704 enum controld_section_e section = controld_section_lrm;
705 const int cib_opts = cib_scope_local|cib_can_create;
706
707 const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
708 const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
709 crm_node_t *peer = NULL;
710
711
712 if (join_from == NULL) {
713 crm_warn("Ignoring message received without node identification");
714 return;
715 }
716 if (op == NULL) {
717 crm_warn("Ignoring message received from %s without task", join_from);
718 return;
719 }
720
721 if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
722 crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
723 op, join_from, CRM_OP_JOIN_CONFIRM);
724 return;
725 }
726
727 if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) {
728 crm_warn("Ignoring join confirmation from %s without valid join ID",
729 join_from);
730 return;
731 }
732
733 peer = crm_get_peer(0, join_from);
734 if (peer->join != crm_join_finalized) {
735 crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
736 "(currently %s not %s)",
737 join_id, join_from, crm_join_phase_str(peer->join),
738 crm_join_phase_str(crm_join_finalized));
739 return;
740 }
741
742 if (join_id != current_join_id) {
743 crm_err("Rejecting join-%d confirmation from %s "
744 "because currently on join-%d",
745 join_id, join_from, current_join_id);
746 crm_update_peer_join(__func__, peer, crm_join_nack);
747 return;
748 }
749
750 crm_update_peer_join(__func__, peer, crm_join_confirmed);
751
752
753
754
755 if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
756 section = controld_section_lrm_unlocked;
757 }
758 controld_delete_node_state(join_from, section, cib_scope_local);
759 if (pcmk__str_eq(join_from, controld_globals.our_nodename,
760 pcmk__str_casei)) {
761 xmlNode *now_dc_lrmd_state = controld_query_executor_state();
762
763 if (now_dc_lrmd_state != NULL) {
764 crm_debug("Updating local node history for join-%d "
765 "from query result", join_id);
766 controld_update_cib(XML_CIB_TAG_STATUS, now_dc_lrmd_state, cib_opts,
767 join_update_complete_callback);
768 free_xml(now_dc_lrmd_state);
769 } else {
770 crm_warn("Updating local node history from join-%d confirmation "
771 "because query failed", join_id);
772 controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts,
773 join_update_complete_callback);
774 }
775 } else {
776 crm_debug("Updating node history for %s from join-%d confirmation",
777 join_from, join_id);
778 controld_update_cib(XML_CIB_TAG_STATUS, join_ack->xml, cib_opts,
779 join_update_complete_callback);
780 }
781 }
782
783 void
784 finalize_join_for(gpointer key, gpointer value, gpointer user_data)
785 {
786 xmlNode *acknak = NULL;
787 xmlNode *tmp1 = NULL;
788 crm_node_t *join_node = value;
789 const char *join_to = join_node->uname;
790 bool integrated = false;
791
792 switch (join_node->join) {
793 case crm_join_integrated:
794 integrated = true;
795 break;
796 case crm_join_nack:
797 case crm_join_nack_quiet:
798 break;
799 default:
800 crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
801 "for join-%d", join_to,
802 crm_join_phase_str(join_node->join), current_join_id);
803 return;
804 }
805
806
807
808
809 crm_trace("Updating node name and UUID in CIB for %s", join_to);
810 tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
811 set_uuid(tmp1, XML_ATTR_ID, join_node);
812 crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
813 fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1);
814 free_xml(tmp1);
815
816 if (join_node->join == crm_join_nack_quiet) {
817 crm_trace("Not sending nack message to node %s with feature set older "
818 "than 3.17.0", join_to);
819 return;
820 }
821
822 join_node = crm_get_peer(0, join_to);
823 if (!crm_is_peer_active(join_node)) {
824
825
826
827
828
829
830
831
832
833 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
834 return;
835 }
836
837
838 crm_debug("%sing join-%d request from %s",
839 integrated? "Acknowledg" : "Nack", current_join_id, join_to);
840 acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
841 pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
842
843 if (integrated) {
844
845 crm_update_peer_join(__func__, join_node, crm_join_finalized);
846 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
847
848
849
850
851
852 if (crm_remote_peer_cache_size() != 0) {
853 GHashTableIter iter;
854 crm_node_t *node = NULL;
855 xmlNode *remotes = create_xml_node(acknak, XML_CIB_TAG_NODES);
856
857 g_hash_table_iter_init(&iter, crm_remote_peer_cache);
858 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
859 xmlNode *remote = NULL;
860
861 if (!node->conn_host) {
862 continue;
863 }
864
865 remote = create_xml_node(remotes, XML_CIB_TAG_NODE);
866 pcmk__xe_set_props(remote,
867 XML_ATTR_ID, node->uname,
868 XML_CIB_TAG_STATE, node->state,
869 PCMK__XA_CONN_HOST, node->conn_host,
870 NULL);
871 }
872 }
873 }
874 send_cluster_message(join_node, crm_msg_crmd, acknak, TRUE);
875 free_xml(acknak);
876 return;
877 }
878
879 gboolean
880 check_join_state(enum crmd_fsa_state cur_state, const char *source)
881 {
882 static unsigned long long highest_seq = 0;
883
884 if (controld_globals.membership_id != crm_peer_seq) {
885 crm_debug("join-%d: Membership changed from %llu to %llu "
886 CRM_XS " highest=%llu state=%s for=%s",
887 current_join_id, controld_globals.membership_id, crm_peer_seq,
888 highest_seq, fsa_state2string(cur_state), source);
889 if(highest_seq < crm_peer_seq) {
890
891 highest_seq = crm_peer_seq;
892 register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
893 }
894
895 } else if (cur_state == S_INTEGRATION) {
896 if (crmd_join_phase_count(crm_join_welcomed) == 0) {
897 int count = crmd_join_phase_count(crm_join_integrated);
898
899 crm_debug("join-%d: Integration of %d peer%s complete "
900 CRM_XS " state=%s for=%s",
901 current_join_id, count, pcmk__plural_s(count),
902 fsa_state2string(cur_state), source);
903 register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
904 return TRUE;
905 }
906
907 } else if (cur_state == S_FINALIZE_JOIN) {
908 if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
909 crm_debug("join-%d: Delaying finalization until we have CIB "
910 CRM_XS " state=%s for=%s",
911 current_join_id, fsa_state2string(cur_state), source);
912 return TRUE;
913
914 } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
915 int count = crmd_join_phase_count(crm_join_welcomed);
916
917 crm_debug("join-%d: Still waiting on %d welcomed node%s "
918 CRM_XS " state=%s for=%s",
919 current_join_id, count, pcmk__plural_s(count),
920 fsa_state2string(cur_state), source);
921 crmd_join_phase_log(LOG_DEBUG);
922
923 } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
924 int count = crmd_join_phase_count(crm_join_integrated);
925
926 crm_debug("join-%d: Still waiting on %d integrated node%s "
927 CRM_XS " state=%s for=%s",
928 current_join_id, count, pcmk__plural_s(count),
929 fsa_state2string(cur_state), source);
930 crmd_join_phase_log(LOG_DEBUG);
931
932 } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
933 int count = crmd_join_phase_count(crm_join_finalized);
934
935 crm_debug("join-%d: Still waiting on %d finalized node%s "
936 CRM_XS " state=%s for=%s",
937 current_join_id, count, pcmk__plural_s(count),
938 fsa_state2string(cur_state), source);
939 crmd_join_phase_log(LOG_DEBUG);
940
941 } else {
942 crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
943 current_join_id, fsa_state2string(cur_state), source);
944 register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
945 return TRUE;
946 }
947 }
948
949 return FALSE;
950 }
951
952 void
953 do_dc_join_final(long long action,
954 enum crmd_fsa_cause cause,
955 enum crmd_fsa_state cur_state,
956 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
957 {
958 crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
959 crm_update_quorum(crm_have_quorum, TRUE);
960 }
961
962 int crmd_join_phase_count(enum crm_join_phase phase)
963 {
964 int count = 0;
965 crm_node_t *peer;
966 GHashTableIter iter;
967
968 g_hash_table_iter_init(&iter, crm_peer_cache);
969 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
970 if(peer->join == phase) {
971 count++;
972 }
973 }
974 return count;
975 }
976
977 void crmd_join_phase_log(int level)
978 {
979 crm_node_t *peer;
980 GHashTableIter iter;
981
982 g_hash_table_iter_init(&iter, crm_peer_cache);
983 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
984 do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
985 crm_join_phase_str(peer->join));
986 }
987 }