This source file includes following definitions.
- controld_destroy_failed_sync_table
- controld_remove_failed_sync_node
- record_failed_sync_node
- lookup_failed_sync_node
- crm_update_peer_join
- start_join_round
- create_dc_message
- join_make_offer
- do_dc_join_offer_all
- do_dc_join_offer_one
- compare_int_fields
- do_dc_join_filter_offer
- do_dc_join_finalize
- free_max_generation
- finalize_sync_callback
- join_node_state_commit_callback
- do_dc_join_ack
- finalize_join_for
- check_join_state
- do_dc_join_final
- crmd_join_phase_count
- crmd_join_phase_log
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <crm/crm.h>
13
14 #include <crm/msg_xml.h>
15 #include <crm/common/xml.h>
16 #include <crm/cluster.h>
17
18 #include <pacemaker-controld.h>
19
20 static char *max_generation_from = NULL;
21 static xmlNodePtr max_generation_xml = NULL;
22
23
24
25
26
27
28
29
30
31
32
33 static GHashTable *failed_sync_nodes = NULL;
34
35 void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
36 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
37 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
38
39
40
41
42 static int current_join_id = 0;
43
44
45
46
47
48 void
49 controld_destroy_failed_sync_table(void)
50 {
51 if (failed_sync_nodes != NULL) {
52 g_hash_table_destroy(failed_sync_nodes);
53 failed_sync_nodes = NULL;
54 }
55 }
56
57
58
59
60
61
62
63 void
64 controld_remove_failed_sync_node(const char *node_name)
65 {
66 if (failed_sync_nodes != NULL) {
67 g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
68 }
69 }
70
71
72
73
74
75
76
77
78 static void
79 record_failed_sync_node(const char *node_name, gint join_id)
80 {
81 if (failed_sync_nodes == NULL) {
82 failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
83 }
84
85
86
87
88 CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
89 GINT_TO_POINTER(join_id)));
90 }
91
92
93
94
95
96
97
98
99
100
101
102
103 static int
104 lookup_failed_sync_node(const char *node_name, gint *join_id)
105 {
106 *join_id = -1;
107
108 if (failed_sync_nodes != NULL) {
109 gpointer result = g_hash_table_lookup(failed_sync_nodes,
110 (gchar *) node_name);
111 if (result != NULL) {
112 *join_id = GPOINTER_TO_INT(result);
113 return pcmk_rc_ok;
114 }
115 }
116 return pcmk_rc_node_unknown;
117 }
118
119 void
120 crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
121 {
122 enum crm_join_phase last = 0;
123
124 CRM_CHECK(node != NULL, return);
125
126
127 if (pcmk_is_set(node->flags, crm_remote_node)) {
128 return;
129 }
130
131 last = node->join;
132
133 if(phase == last) {
134 crm_trace("Node %s join-%d phase is still %s "
135 CRM_XS " nodeid=%u source=%s",
136 node->uname, current_join_id, crm_join_phase_str(last),
137 node->id, source);
138
139 } else if ((phase <= crm_join_none) || (phase == (last + 1))) {
140 node->join = phase;
141 crm_trace("Node %s join-%d phase is now %s (was %s) "
142 CRM_XS " nodeid=%u source=%s",
143 node->uname, current_join_id, crm_join_phase_str(phase),
144 crm_join_phase_str(last), node->id, source);
145
146 } else {
147 crm_warn("Rejecting join-%d phase update for node %s because "
148 "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
149 current_join_id, node->uname, crm_join_phase_str(last),
150 crm_join_phase_str(phase), node->id, source);
151 }
152 }
153
154 static void
155 start_join_round(void)
156 {
157 GHashTableIter iter;
158 crm_node_t *peer = NULL;
159
160 crm_debug("Starting new join round join-%d", current_join_id);
161
162 g_hash_table_iter_init(&iter, crm_peer_cache);
163 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
164 crm_update_peer_join(__func__, peer, crm_join_none);
165 }
166 if (max_generation_from != NULL) {
167 free(max_generation_from);
168 max_generation_from = NULL;
169 }
170 if (max_generation_xml != NULL) {
171 free_xml(max_generation_xml);
172 max_generation_xml = NULL;
173 }
174 controld_clear_fsa_input_flags(R_HAVE_CIB);
175 }
176
177
178
179
180
181
182
183
184 static xmlNode *
185 create_dc_message(const char *join_op, const char *host_to)
186 {
187 xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
188 CRM_SYSTEM_DC, NULL);
189
190
191 crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id);
192
193
194
195
196 pcmk__xe_set_bool_attr(msg, F_CRM_DC_LEAVING,
197 pcmk_is_set(controld_globals.fsa_input_register,
198 R_SHUTDOWN));
199 return msg;
200 }
201
202 static void
203 join_make_offer(gpointer key, gpointer value, gpointer user_data)
204 {
205 xmlNode *offer = NULL;
206 crm_node_t *member = (crm_node_t *)value;
207
208 CRM_ASSERT(member != NULL);
209 if (crm_is_peer_active(member) == FALSE) {
210 crm_info("Not making join-%d offer to inactive node %s",
211 current_join_id,
212 (member->uname? member->uname : "with unknown name"));
213 if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
214
215
216
217
218
219
220
221
222
223 pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
224 }
225 return;
226 }
227
228 if (member->uname == NULL) {
229 crm_info("Not making join-%d offer to node uuid %s with unknown name",
230 current_join_id, member->uuid);
231 return;
232 }
233
234 if (controld_globals.membership_id != crm_peer_seq) {
235 controld_globals.membership_id = crm_peer_seq;
236 crm_info("Making join-%d offers based on membership event %llu",
237 current_join_id, crm_peer_seq);
238 }
239
240 if(user_data && member->join > crm_join_none) {
241 crm_info("Not making join-%d offer to already known node %s (%s)",
242 current_join_id, member->uname,
243 crm_join_phase_str(member->join));
244 return;
245 }
246
247 crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);
248
249 offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
250
251
252 crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
253
254 crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
255 send_cluster_message(member, crm_msg_crmd, offer, TRUE);
256 free_xml(offer);
257
258 crm_update_peer_join(__func__, member, crm_join_welcomed);
259 }
260
261
262 void
263 do_dc_join_offer_all(long long action,
264 enum crmd_fsa_cause cause,
265 enum crmd_fsa_state cur_state,
266 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
267 {
268 int count;
269
270
271
272
273
274 current_join_id++;
275 start_join_round();
276
277 update_dc(NULL);
278 if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
279 crm_info("A new node joined the cluster");
280 }
281 g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
282
283 count = crmd_join_phase_count(crm_join_welcomed);
284 crm_info("Waiting on join-%d requests from %d outstanding node%s",
285 current_join_id, count, pcmk__plural_s(count));
286
287
288 }
289
290
291 void
292 do_dc_join_offer_one(long long action,
293 enum crmd_fsa_cause cause,
294 enum crmd_fsa_state cur_state,
295 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
296 {
297 crm_node_t *member;
298 ha_msg_input_t *welcome = NULL;
299 int count;
300 const char *join_to = NULL;
301
302 if (msg_data->data == NULL) {
303 crm_info("Making join-%d offers to any unconfirmed nodes "
304 "because an unknown node joined", current_join_id);
305 g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
306 check_join_state(cur_state, __func__);
307 return;
308 }
309
310 welcome = fsa_typed_data(fsa_dt_ha_msg);
311 if (welcome == NULL) {
312
313 return;
314 }
315
316 join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
317 if (join_to == NULL) {
318 crm_err("Can't make join-%d offer to unknown node", current_join_id);
319 return;
320 }
321 member = crm_get_peer(0, join_to);
322
323
324
325
326
327
328 crm_update_peer_join(__func__, member, crm_join_none);
329 join_make_offer(NULL, member, NULL);
330
331
332
333
334 if (strcasecmp(join_to, controld_globals.our_nodename) != 0) {
335 member = crm_get_peer(0, controld_globals.our_nodename);
336 join_make_offer(NULL, member, NULL);
337 }
338
339
340
341
342 abort_transition(INFINITY, pcmk__graph_restart, "Node join", NULL);
343
344 count = crmd_join_phase_count(crm_join_welcomed);
345 crm_info("Waiting on join-%d requests from %d outstanding node%s",
346 current_join_id, count, pcmk__plural_s(count));
347
348
349 }
350
351 static int
352 compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
353 {
354 const char *elem_l = crm_element_value(left, field);
355 const char *elem_r = crm_element_value(right, field);
356
357 long long int_elem_l;
358 long long int_elem_r;
359
360 pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
361 pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
362
363 if (int_elem_l < int_elem_r) {
364 return -1;
365
366 } else if (int_elem_l > int_elem_r) {
367 return 1;
368 }
369
370 return 0;
371 }
372
373
374 void
375 do_dc_join_filter_offer(long long action,
376 enum crmd_fsa_cause cause,
377 enum crmd_fsa_state cur_state,
378 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
379 {
380 xmlNode *generation = NULL;
381
382 int cmp = 0;
383 int join_id = -1;
384 int count = 0;
385 gint value = 0;
386 gboolean ack_nack_bool = TRUE;
387 ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
388
389 const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
390 const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
391 const char *join_version = crm_element_value(join_ack->msg,
392 XML_ATTR_CRM_VERSION);
393 crm_node_t *join_node = NULL;
394
395 if (join_from == NULL) {
396 crm_err("Ignoring invalid join request without node name");
397 return;
398 }
399 join_node = crm_get_peer(0, join_from);
400
401 crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
402 if (join_id != current_join_id) {
403 crm_debug("Ignoring join-%d request from %s because we are on join-%d",
404 join_id, join_from, current_join_id);
405 check_join_state(cur_state, __func__);
406 return;
407 }
408
409 generation = join_ack->xml;
410 if (max_generation_xml != NULL && generation != NULL) {
411 int lpc = 0;
412
413 const char *attributes[] = {
414 XML_ATTR_GENERATION_ADMIN,
415 XML_ATTR_GENERATION,
416 XML_ATTR_NUMUPDATES,
417 };
418
419 for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
420 cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
421 }
422 }
423
424 if (ref == NULL) {
425 ref = "none";
426 }
427
428 if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
429 crm_err("Rejecting join-%d request from node %s because we failed to "
430 "sync its CIB in join-%d " CRM_XS " ref=%s",
431 join_id, join_from, value, ref);
432 ack_nack_bool = FALSE;
433
434 } else if (!crm_is_peer_active(join_node)) {
435 if (match_down_event(join_from) != NULL) {
436
437
438
439
440
441
442 crm_debug("Rejecting join-%d request from inactive node %s "
443 CRM_XS " ref=%s", join_id, join_from, ref);
444 } else {
445 crm_err("Rejecting join-%d request from inactive node %s "
446 CRM_XS " ref=%s", join_id, join_from, ref);
447 }
448 ack_nack_bool = FALSE;
449
450 } else if (generation == NULL) {
451 crm_err("Rejecting invalid join-%d request from node %s "
452 "missing CIB generation " CRM_XS " ref=%s",
453 join_id, join_from, ref);
454 ack_nack_bool = FALSE;
455
456 } else if ((join_version == NULL)
457 || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
458 crm_err("Rejecting join-%d request from node %s because feature set %s"
459 " is incompatible with ours (%s) " CRM_XS " ref=%s",
460 join_id, join_from, (join_version? join_version : "pre-3.1.0"),
461 CRM_FEATURE_SET, ref);
462 ack_nack_bool = FALSE;
463
464 } else if (max_generation_xml == NULL) {
465 const char *validation = crm_element_value(generation,
466 XML_ATTR_VALIDATION);
467
468 if (get_schema_version(validation) < 0) {
469 crm_err("Rejecting join-%d request from %s (with first CIB "
470 "generation) due to unknown schema version %s "
471 CRM_XS " ref=%s",
472 join_id, join_from, validation, ref);
473 ack_nack_bool = FALSE;
474
475 } else {
476 crm_debug("Accepting join-%d request from %s (with first CIB "
477 "generation) " CRM_XS " ref=%s",
478 join_id, join_from, ref);
479 max_generation_xml = copy_xml(generation);
480 pcmk__str_update(&max_generation_from, join_from);
481 }
482
483 } else if ((cmp < 0)
484 || ((cmp == 0)
485 && pcmk__str_eq(join_from, controld_globals.our_nodename,
486 pcmk__str_casei))) {
487 const char *validation = crm_element_value(generation,
488 XML_ATTR_VALIDATION);
489
490 if (get_schema_version(validation) < 0) {
491 crm_err("Rejecting join-%d request from %s (with better CIB "
492 "generation than current best from %s) due to unknown "
493 "schema version %s " CRM_XS " ref=%s",
494 join_id, join_from, max_generation_from, validation, ref);
495 ack_nack_bool = FALSE;
496
497 } else {
498 crm_debug("Accepting join-%d request from %s (with better CIB "
499 "generation than current best from %s) " CRM_XS " ref=%s",
500 join_id, join_from, max_generation_from, ref);
501 crm_log_xml_debug(max_generation_xml, "Old max generation");
502 crm_log_xml_debug(generation, "New max generation");
503
504 free_xml(max_generation_xml);
505 max_generation_xml = copy_xml(join_ack->xml);
506 pcmk__str_update(&max_generation_from, join_from);
507 }
508
509 } else {
510 crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
511 join_id, join_from, ref);
512 }
513
514 if (!ack_nack_bool) {
515 if (compare_version(join_version, "3.17.0") < 0) {
516
517
518
519 crm_update_peer_join(__func__, join_node, crm_join_nack_quiet);
520 } else {
521 crm_update_peer_join(__func__, join_node, crm_join_nack);
522 }
523 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
524
525 } else {
526 crm_update_peer_join(__func__, join_node, crm_join_integrated);
527 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
528 }
529
530 count = crmd_join_phase_count(crm_join_integrated);
531 crm_debug("%d node%s currently integrated in join-%d",
532 count, pcmk__plural_s(count), join_id);
533
534 if (check_join_state(cur_state, __func__) == FALSE) {
535
536 count = crmd_join_phase_count(crm_join_welcomed);
537 crm_debug("Waiting on join-%d requests from %d outstanding node%s",
538 join_id, count, pcmk__plural_s(count));
539 }
540 }
541
542
543 void
544 do_dc_join_finalize(long long action,
545 enum crmd_fsa_cause cause,
546 enum crmd_fsa_state cur_state,
547 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
548 {
549 char *sync_from = NULL;
550 int rc = pcmk_ok;
551 int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
552 int count_finalizable = crmd_join_phase_count(crm_join_integrated)
553 + crmd_join_phase_count(crm_join_nack)
554 + crmd_join_phase_count(crm_join_nack_quiet);
555
556
557
558
559 if (count_welcomed != 0) {
560 crm_debug("Waiting on join-%d requests from %d outstanding node%s "
561 "before finalizing join", current_join_id, count_welcomed,
562 pcmk__plural_s(count_welcomed));
563 crmd_join_phase_log(LOG_DEBUG);
564
565 return;
566
567 } else if (count_finalizable == 0) {
568 crm_debug("Finalization not needed for join-%d at the current time",
569 current_join_id);
570 crmd_join_phase_log(LOG_DEBUG);
571 check_join_state(controld_globals.fsa_state, __func__);
572 return;
573 }
574
575 controld_clear_fsa_input_flags(R_HAVE_CIB);
576 if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename,
577 pcmk__str_null_matches|pcmk__str_casei)) {
578 controld_set_fsa_input_flags(R_HAVE_CIB);
579 }
580
581 if (!controld_globals.transition_graph->complete) {
582 crm_warn("Delaying join-%d finalization while transition in progress",
583 current_join_id);
584 crmd_join_phase_log(LOG_DEBUG);
585 crmd_fsa_stall(FALSE);
586 return;
587 }
588
589 if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
590
591 pcmk__str_update(&sync_from, controld_globals.our_nodename);
592 crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
593 current_join_id, count_finalizable,
594 pcmk__plural_s(count_finalizable));
595 crm_log_xml_debug(max_generation_xml, "Requested CIB version");
596
597 } else {
598
599 pcmk__str_update(&sync_from, max_generation_from);
600 crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
601 current_join_id, count_finalizable,
602 pcmk__plural_s(count_finalizable), sync_from);
603 crm_log_xml_notice(max_generation_xml, "Requested CIB version");
604 }
605 crmd_join_phase_log(LOG_DEBUG);
606
607 rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
608 sync_from, NULL, cib_none);
609 fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
610 }
611
612 void
613 free_max_generation(void)
614 {
615 free(max_generation_from);
616 max_generation_from = NULL;
617
618 free_xml(max_generation_xml);
619 max_generation_xml = NULL;
620 }
621
622 void
623 finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
624 {
625 CRM_LOG_ASSERT(-EPERM != rc);
626
627 if (rc != pcmk_ok) {
628 const char *sync_from = (const char *) user_data;
629
630 do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
631 "Could not sync CIB from %s in join-%d: %s",
632 sync_from, current_join_id, pcmk_strerror(rc));
633
634 if (rc != -pcmk_err_old_data) {
635 record_failed_sync_node(sync_from, current_join_id);
636 }
637
638
639 register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
640 __func__);
641
642 } else if (!AM_I_DC) {
643 crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
644
645 } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
646 crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
647 "(%s)", current_join_id,
648 fsa_state2string(controld_globals.fsa_state));
649
650 } else {
651 controld_set_fsa_input_flags(R_HAVE_CIB);
652
653
654 if (!check_join_state(controld_globals.fsa_state, __func__)) {
655 int count_finalizable = 0;
656
657 count_finalizable = crmd_join_phase_count(crm_join_integrated)
658 + crmd_join_phase_count(crm_join_nack)
659 + crmd_join_phase_count(crm_join_nack_quiet);
660
661 crm_debug("Notifying %d node%s of join-%d results",
662 count_finalizable, pcmk__plural_s(count_finalizable),
663 current_join_id);
664 g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
665 }
666 }
667 }
668
669 static void
670 join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
671 xmlNode *output, void *user_data)
672 {
673 const char *node = user_data;
674
675 if (rc != pcmk_ok) {
676 fsa_data_t *msg_data = NULL;
677
678 crm_crit("join-%d node history update (via CIB call %d) for node %s "
679 "failed: %s",
680 current_join_id, call_id, node, pcmk_strerror(rc));
681 crm_log_xml_debug(msg, "failed");
682 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
683 }
684
685 crm_debug("join-%d node history update (via CIB call %d) for node %s "
686 "complete",
687 current_join_id, call_id, node);
688 check_join_state(controld_globals.fsa_state, __func__);
689 }
690
691
692 void
693 do_dc_join_ack(long long action,
694 enum crmd_fsa_cause cause,
695 enum crmd_fsa_state cur_state,
696 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
697 {
698 int join_id = -1;
699 ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
700
701 const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
702 char *join_from = crm_element_value_copy(join_ack->msg, F_CRM_HOST_FROM);
703 crm_node_t *peer = NULL;
704
705 enum controld_section_e section = controld_section_lrm;
706 char *xpath = NULL;
707 xmlNode *state = join_ack->xml;
708 xmlNode *execd_state = NULL;
709
710 cib_t *cib = controld_globals.cib_conn;
711 int rc = pcmk_ok;
712
713
714 if (join_from == NULL) {
715 crm_warn("Ignoring message received without node identification");
716 goto done;
717 }
718 if (op == NULL) {
719 crm_warn("Ignoring message received from %s without task", join_from);
720 goto done;
721 }
722
723 if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
724 crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
725 op, join_from, CRM_OP_JOIN_CONFIRM);
726 goto done;
727 }
728
729 if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) {
730 crm_warn("Ignoring join confirmation from %s without valid join ID",
731 join_from);
732 goto done;
733 }
734
735 peer = crm_get_peer(0, join_from);
736 if (peer->join != crm_join_finalized) {
737 crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
738 "(currently %s not %s)",
739 join_id, join_from, crm_join_phase_str(peer->join),
740 crm_join_phase_str(crm_join_finalized));
741 goto done;
742 }
743
744 if (join_id != current_join_id) {
745 crm_err("Rejecting join-%d confirmation from %s "
746 "because currently on join-%d",
747 join_id, join_from, current_join_id);
748 crm_update_peer_join(__func__, peer, crm_join_nack);
749 goto done;
750 }
751
752 crm_update_peer_join(__func__, peer, crm_join_confirmed);
753
754
755
756
757
758
759 rc = cib->cmds->init_transaction(cib);
760 if (rc != pcmk_ok) {
761 goto done;
762 }
763
764
765 if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
766 section = controld_section_lrm_unlocked;
767 }
768 controld_node_state_deletion_strings(join_from, section, &xpath, NULL);
769
770 rc = cib->cmds->remove(cib, xpath, NULL,
771 cib_scope_local
772 |cib_xpath
773 |cib_multiple
774 |cib_transaction);
775 if (rc != pcmk_ok) {
776 goto done;
777 }
778
779
780 if (pcmk__str_eq(join_from, controld_globals.our_nodename,
781 pcmk__str_casei)) {
782
783
784 execd_state = controld_query_executor_state();
785
786 if (execd_state != NULL) {
787 crm_debug("Updating local node history for join-%d from query "
788 "result",
789 current_join_id);
790 state = execd_state;
791
792 } else {
793 crm_warn("Updating local node history from join-%d confirmation "
794 "because query failed",
795 current_join_id);
796 }
797
798 } else {
799 crm_debug("Updating node history for %s from join-%d confirmation",
800 join_from, current_join_id);
801 }
802
803 rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, state,
804 cib_scope_local|cib_can_create|cib_transaction);
805 free_xml(execd_state);
806 if (rc != pcmk_ok) {
807 goto done;
808 }
809
810
811 rc = cib->cmds->end_transaction(cib, true, cib_scope_local);
812 fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);
813
814 if (rc > 0) {
815
816 join_from = NULL;
817 rc = pcmk_ok;
818 }
819
820 done:
821 if (rc != pcmk_ok) {
822 crm_crit("join-%d node history update for node %s failed: %s",
823 current_join_id, join_from, pcmk_strerror(rc));
824 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
825 }
826 free(join_from);
827 free(xpath);
828 }
829
830 void
831 finalize_join_for(gpointer key, gpointer value, gpointer user_data)
832 {
833 xmlNode *acknak = NULL;
834 xmlNode *tmp1 = NULL;
835 crm_node_t *join_node = value;
836 const char *join_to = join_node->uname;
837 bool integrated = false;
838
839 switch (join_node->join) {
840 case crm_join_integrated:
841 integrated = true;
842 break;
843 case crm_join_nack:
844 case crm_join_nack_quiet:
845 break;
846 default:
847 crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
848 "for join-%d", join_to,
849 crm_join_phase_str(join_node->join), current_join_id);
850 return;
851 }
852
853
854
855
856 crm_trace("Updating node name and UUID in CIB for %s", join_to);
857 tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
858 crm_xml_add(tmp1, XML_ATTR_ID, crm_peer_uuid(join_node));
859 crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
860 fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1);
861 free_xml(tmp1);
862
863 if (join_node->join == crm_join_nack_quiet) {
864 crm_trace("Not sending nack message to node %s with feature set older "
865 "than 3.17.0", join_to);
866 return;
867 }
868
869 join_node = crm_get_peer(0, join_to);
870 if (!crm_is_peer_active(join_node)) {
871
872
873
874
875
876
877
878
879
880 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
881 return;
882 }
883
884
885 crm_debug("%sing join-%d request from %s",
886 integrated? "Acknowledg" : "Nack", current_join_id, join_to);
887 acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
888 pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
889
890 if (integrated) {
891
892 crm_update_peer_join(__func__, join_node, crm_join_finalized);
893 pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
894
895
896
897
898
899 if (crm_remote_peer_cache_size() != 0) {
900 GHashTableIter iter;
901 crm_node_t *node = NULL;
902 xmlNode *remotes = create_xml_node(acknak, XML_CIB_TAG_NODES);
903
904 g_hash_table_iter_init(&iter, crm_remote_peer_cache);
905 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
906 xmlNode *remote = NULL;
907
908 if (!node->conn_host) {
909 continue;
910 }
911
912 remote = create_xml_node(remotes, XML_CIB_TAG_NODE);
913 pcmk__xe_set_props(remote,
914 XML_ATTR_ID, node->uname,
915 XML_CIB_TAG_STATE, node->state,
916 PCMK__XA_CONN_HOST, node->conn_host,
917 NULL);
918 }
919 }
920 }
921 send_cluster_message(join_node, crm_msg_crmd, acknak, TRUE);
922 free_xml(acknak);
923 return;
924 }
925
926 gboolean
927 check_join_state(enum crmd_fsa_state cur_state, const char *source)
928 {
929 static unsigned long long highest_seq = 0;
930
931 if (controld_globals.membership_id != crm_peer_seq) {
932 crm_debug("join-%d: Membership changed from %llu to %llu "
933 CRM_XS " highest=%llu state=%s for=%s",
934 current_join_id, controld_globals.membership_id, crm_peer_seq,
935 highest_seq, fsa_state2string(cur_state), source);
936 if(highest_seq < crm_peer_seq) {
937
938 highest_seq = crm_peer_seq;
939 register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
940 }
941
942 } else if (cur_state == S_INTEGRATION) {
943 if (crmd_join_phase_count(crm_join_welcomed) == 0) {
944 int count = crmd_join_phase_count(crm_join_integrated);
945
946 crm_debug("join-%d: Integration of %d peer%s complete "
947 CRM_XS " state=%s for=%s",
948 current_join_id, count, pcmk__plural_s(count),
949 fsa_state2string(cur_state), source);
950 register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
951 return TRUE;
952 }
953
954 } else if (cur_state == S_FINALIZE_JOIN) {
955 if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
956 crm_debug("join-%d: Delaying finalization until we have CIB "
957 CRM_XS " state=%s for=%s",
958 current_join_id, fsa_state2string(cur_state), source);
959 return TRUE;
960
961 } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
962 int count = crmd_join_phase_count(crm_join_welcomed);
963
964 crm_debug("join-%d: Still waiting on %d welcomed node%s "
965 CRM_XS " state=%s for=%s",
966 current_join_id, count, pcmk__plural_s(count),
967 fsa_state2string(cur_state), source);
968 crmd_join_phase_log(LOG_DEBUG);
969
970 } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
971 int count = crmd_join_phase_count(crm_join_integrated);
972
973 crm_debug("join-%d: Still waiting on %d integrated node%s "
974 CRM_XS " state=%s for=%s",
975 current_join_id, count, pcmk__plural_s(count),
976 fsa_state2string(cur_state), source);
977 crmd_join_phase_log(LOG_DEBUG);
978
979 } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
980 int count = crmd_join_phase_count(crm_join_finalized);
981
982 crm_debug("join-%d: Still waiting on %d finalized node%s "
983 CRM_XS " state=%s for=%s",
984 current_join_id, count, pcmk__plural_s(count),
985 fsa_state2string(cur_state), source);
986 crmd_join_phase_log(LOG_DEBUG);
987
988 } else {
989 crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
990 current_join_id, fsa_state2string(cur_state), source);
991 register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
992 return TRUE;
993 }
994 }
995
996 return FALSE;
997 }
998
999 void
1000 do_dc_join_final(long long action,
1001 enum crmd_fsa_cause cause,
1002 enum crmd_fsa_state cur_state,
1003 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
1004 {
1005 crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
1006 crm_update_quorum(crm_have_quorum, TRUE);
1007 }
1008
1009 int crmd_join_phase_count(enum crm_join_phase phase)
1010 {
1011 int count = 0;
1012 crm_node_t *peer;
1013 GHashTableIter iter;
1014
1015 g_hash_table_iter_init(&iter, crm_peer_cache);
1016 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1017 if(peer->join == phase) {
1018 count++;
1019 }
1020 }
1021 return count;
1022 }
1023
1024 void crmd_join_phase_log(int level)
1025 {
1026 crm_node_t *peer;
1027 GHashTableIter iter;
1028
1029 g_hash_table_iter_init(&iter, crm_peer_cache);
1030 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1031 do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
1032 crm_join_phase_str(peer->join));
1033 }
1034 }