root/daemons/controld/controld_join_dc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. crm_update_peer_join
  2. start_join_round
  3. create_dc_message
  4. join_make_offer
  5. do_dc_join_offer_all
  6. do_dc_join_offer_one
  7. compare_int_fields
  8. do_dc_join_filter_offer
  9. do_dc_join_finalize
  10. finalize_sync_callback
  11. join_update_complete_callback
  12. do_dc_join_ack
  13. finalize_join_for
  14. check_join_state
  15. do_dc_join_final
  16. crmd_join_phase_count
  17. crmd_join_phase_log

   1 /*
   2  * Copyright 2004-2022 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <crm/crm.h>
  13 
  14 #include <crm/msg_xml.h>
  15 #include <crm/common/xml.h>
  16 #include <crm/cluster.h>
  17 
  18 #include <pacemaker-controld.h>
  19 
  20 char *max_epoch = NULL;
  21 char *max_generation_from = NULL;
  22 xmlNode *max_generation_xml = NULL;
  23 
  24 void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
  25 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
  26 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
  27 
  28 /* Numeric counter used to identify join rounds (an unsigned int would be
  29  * appropriate, except we get and set it in XML as int)
  30  */
  31 static int current_join_id = 0;
  32 
  33 unsigned long long saved_ccm_membership_id = 0;
  34 
  35 void
  36 crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
     /* [previous][next][first][last][top][bottom][index][help] */
  37 {
  38     enum crm_join_phase last = 0;
  39 
  40     CRM_CHECK(node != NULL, return);
  41 
  42     /* Remote nodes do not participate in joins */
  43     if (pcmk_is_set(node->flags, crm_remote_node)) {
  44         return;
  45     }
  46 
  47     last = node->join;
  48 
  49     if(phase == last) {
  50         crm_trace("Node %s join-%d phase is still %s "
  51                   CRM_XS " nodeid=%u source=%s",
  52                   node->uname, current_join_id, crm_join_phase_str(last),
  53                   node->id, source);
  54 
  55     } else if ((phase <= crm_join_none) || (phase == (last + 1))) {
  56         node->join = phase;
  57         crm_trace("Node %s join-%d phase is now %s (was %s) "
  58                   CRM_XS " nodeid=%u source=%s",
  59                  node->uname, current_join_id, crm_join_phase_str(phase),
  60                  crm_join_phase_str(last), node->id, source);
  61 
  62     } else {
  63         crm_warn("Rejecting join-%d phase update for node %s because "
  64                  "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
  65                  current_join_id, node->uname, crm_join_phase_str(last),
  66                  crm_join_phase_str(phase), node->id, source);
  67     }
  68 }
  69 
  70 static void
  71 start_join_round(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  72 {
  73     GHashTableIter iter;
  74     crm_node_t *peer = NULL;
  75 
  76     crm_debug("Starting new join round join-%d", current_join_id);
  77 
  78     g_hash_table_iter_init(&iter, crm_peer_cache);
  79     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
  80         crm_update_peer_join(__func__, peer, crm_join_none);
  81     }
  82     if (max_generation_from != NULL) {
  83         free(max_generation_from);
  84         max_generation_from = NULL;
  85     }
  86     if (max_generation_xml != NULL) {
  87         free_xml(max_generation_xml);
  88         max_generation_xml = NULL;
  89     }
  90     controld_clear_fsa_input_flags(R_HAVE_CIB|R_CIB_ASKED);
  91 }
  92 
  93 /*!
  94  * \internal
  95  * \brief Create a join message from the DC
  96  *
  97  * \param[in] join_op  Join operation name
  98  * \param[in] host_to  Recipient of message
  99  */
 100 static xmlNode *
 101 create_dc_message(const char *join_op, const char *host_to)
     /* [previous][next][first][last][top][bottom][index][help] */
 102 {
 103     xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
 104                                   CRM_SYSTEM_DC, NULL);
 105 
 106     /* Identify which election this is a part of */
 107     crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id);
 108 
 109     /* Add a field specifying whether the DC is shutting down. This keeps the
 110      * joining node from fencing the old DC if it becomes the new DC.
 111      */
 112     pcmk__xe_set_bool_attr(msg, F_CRM_DC_LEAVING,
 113                            pcmk_is_set(fsa_input_register, R_SHUTDOWN));
 114     return msg;
 115 }
 116 
 117 static void
 118 join_make_offer(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 119 {
 120     xmlNode *offer = NULL;
 121     crm_node_t *member = (crm_node_t *)value;
 122 
 123     CRM_ASSERT(member != NULL);
 124     if (crm_is_peer_active(member) == FALSE) {
 125         crm_info("Not making join-%d offer to inactive node %s",
 126                  current_join_id,
 127                  (member->uname? member->uname : "with unknown name"));
 128         if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
 129             /* You would think this unsafe, but in fact this plus an
 130              * active resource is what causes it to be fenced.
 131              *
 132              * Yes, this does mean that any node that dies at the same
 133              * time as the old DC and is not running resource (still)
 134              * won't be fenced.
 135              *
 136              * I'm not happy about this either.
 137              */
 138             pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
 139         }
 140         return;
 141     }
 142 
 143     if (member->uname == NULL) {
 144         crm_info("Not making join-%d offer to node uuid %s with unknown name",
 145                  current_join_id, member->uuid);
 146         return;
 147     }
 148 
 149     if (saved_ccm_membership_id != crm_peer_seq) {
 150         saved_ccm_membership_id = crm_peer_seq;
 151         crm_info("Making join-%d offers based on membership event %llu",
 152                  current_join_id, crm_peer_seq);
 153     }
 154 
 155     if(user_data && member->join > crm_join_none) {
 156         crm_info("Not making join-%d offer to already known node %s (%s)",
 157                  current_join_id, member->uname,
 158                  crm_join_phase_str(member->join));
 159         return;
 160     }
 161 
 162     crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);
 163 
 164     offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
 165 
 166     // Advertise our feature set so the joining node can bail if not compatible
 167     crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
 168 
 169     crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
 170     send_cluster_message(member, crm_msg_crmd, offer, TRUE);
 171     free_xml(offer);
 172 
 173     crm_update_peer_join(__func__, member, crm_join_welcomed);
 174 }
 175 
 176 /*       A_DC_JOIN_OFFER_ALL    */
 177 void
 178 do_dc_join_offer_all(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 179                      enum crmd_fsa_cause cause,
 180                      enum crmd_fsa_state cur_state,
 181                      enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 182 {
 183     int count;
 184 
 185     /* Reset everyone's status back to down or in_ccm in the CIB.
 186      * Any nodes that are active in the CIB but not in the cluster membership
 187      * will be seen as offline by the scheduler anyway.
 188      */
 189     current_join_id++;
 190     start_join_round();
 191     // do_update_cib_nodes(TRUE, __func__);
 192 
 193     update_dc(NULL);
 194     if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
 195         crm_info("A new node joined the cluster");
 196     }
 197     g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
 198 
 199     count = crmd_join_phase_count(crm_join_welcomed);
 200     crm_info("Waiting on join-%d requests from %d outstanding node%s",
 201              current_join_id, count, pcmk__plural_s(count));
 202 
 203     // Don't waste time by invoking the scheduler yet
 204 }
 205 
 206 /*       A_DC_JOIN_OFFER_ONE    */
 207 void
 208 do_dc_join_offer_one(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 209                      enum crmd_fsa_cause cause,
 210                      enum crmd_fsa_state cur_state,
 211                      enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 212 {
 213     crm_node_t *member;
 214     ha_msg_input_t *welcome = NULL;
 215     int count;
 216     const char *join_to = NULL;
 217 
 218     if (msg_data->data == NULL) {
 219         crm_info("Making join-%d offers to any unconfirmed nodes "
 220                  "because an unknown node joined", current_join_id);
 221         g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
 222         check_join_state(cur_state, __func__);
 223         return;
 224     }
 225 
 226     welcome = fsa_typed_data(fsa_dt_ha_msg);
 227     if (welcome == NULL) {
 228         // fsa_typed_data() already logged an error
 229         return;
 230     }
 231 
 232     join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
 233     if (join_to == NULL) {
 234         crm_err("Can't make join-%d offer to unknown node", current_join_id);
 235         return;
 236     }
 237     member = crm_get_peer(0, join_to);
 238 
 239     /* It is possible that a node will have been sick or starting up when the
 240      * original offer was made. However, it will either re-announce itself in
 241      * due course, or we can re-store the original offer on the client.
 242      */
 243 
 244     crm_update_peer_join(__func__, member, crm_join_none);
 245     join_make_offer(NULL, member, NULL);
 246 
 247     /* If the offer isn't to the local node, make an offer to the local node as
 248      * well, to ensure the correct value for max_generation_from.
 249      */
 250     if (strcmp(join_to, fsa_our_uname) != 0) {
 251         member = crm_get_peer(0, fsa_our_uname);
 252         join_make_offer(NULL, member, NULL);
 253     }
 254 
 255     /* This was a genuine join request; cancel any existing transition and
 256      * invoke the scheduler.
 257      */
 258     abort_transition(INFINITY, pcmk__graph_restart, "Node join", NULL);
 259 
 260     count = crmd_join_phase_count(crm_join_welcomed);
 261     crm_info("Waiting on join-%d requests from %d outstanding node%s",
 262              current_join_id, count, pcmk__plural_s(count));
 263 
 264     // Don't waste time by invoking the scheduler yet
 265 }
 266 
 267 static int
 268 compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
     /* [previous][next][first][last][top][bottom][index][help] */
 269 {
 270     const char *elem_l = crm_element_value(left, field);
 271     const char *elem_r = crm_element_value(right, field);
 272 
 273     long long int_elem_l;
 274     long long int_elem_r;
 275 
 276     pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
 277     pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
 278 
 279     if (int_elem_l < int_elem_r) {
 280         return -1;
 281 
 282     } else if (int_elem_l > int_elem_r) {
 283         return 1;
 284     }
 285 
 286     return 0;
 287 }
 288 
 289 /*       A_DC_JOIN_PROCESS_REQ  */
 290 void
 291 do_dc_join_filter_offer(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 292                         enum crmd_fsa_cause cause,
 293                         enum crmd_fsa_state cur_state,
 294                         enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 295 {
 296     xmlNode *generation = NULL;
 297 
 298     int cmp = 0;
 299     int join_id = -1;
 300     int count = 0;
 301     gboolean ack_nack_bool = TRUE;
 302     ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
 303 
 304     const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
 305     const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
 306     const char *join_version = crm_element_value(join_ack->msg,
 307                                                  XML_ATTR_CRM_VERSION);
 308     crm_node_t *join_node = NULL;
 309 
 310     if (join_from == NULL) {
 311         crm_err("Ignoring invalid join request without node name");
 312         return;
 313     }
 314     join_node = crm_get_peer(0, join_from);
 315 
 316     crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
 317     if (join_id != current_join_id) {
 318         crm_debug("Ignoring join-%d request from %s because we are on join-%d",
 319                   join_id, join_from, current_join_id);
 320         check_join_state(cur_state, __func__);
 321         return;
 322     }
 323 
 324     generation = join_ack->xml;
 325     if (max_generation_xml != NULL && generation != NULL) {
 326         int lpc = 0;
 327 
 328         const char *attributes[] = {
 329             XML_ATTR_GENERATION_ADMIN,
 330             XML_ATTR_GENERATION,
 331             XML_ATTR_NUMUPDATES,
 332         };
 333 
 334         for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
 335             cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
 336         }
 337     }
 338 
 339     if (ref == NULL) {
 340         ref = "none"; // for logging only
 341     }
 342 
 343     if (crm_is_peer_active(join_node) == FALSE) {
 344         crm_err("Rejecting join-%d request from inactive node %s "
 345                 CRM_XS " ref=%s", join_id, join_from, ref);
 346         ack_nack_bool = FALSE;
 347 
 348     } else if (generation == NULL) {
 349         crm_err("Rejecting invalid join-%d request from node %s "
 350                 "missing CIB generation " CRM_XS " ref=%s",
 351                 join_id, join_from, ref);
 352         ack_nack_bool = FALSE;
 353 
 354     } else if ((join_version == NULL)
 355                || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
 356         crm_err("Rejecting join-%d request from node %s because feature set %s"
 357                 " is incompatible with ours (%s) " CRM_XS " ref=%s",
 358                 join_id, join_from, (join_version? join_version : "pre-3.1.0"),
 359                 CRM_FEATURE_SET, ref);
 360         ack_nack_bool = FALSE;
 361 
 362     } else if (max_generation_xml == NULL) {
 363         crm_debug("Accepting join-%d request from %s "
 364                   "(with first CIB generation) " CRM_XS " ref=%s",
 365                   join_id, join_from, ref);
 366         max_generation_xml = copy_xml(generation);
 367         max_generation_from = strdup(join_from);
 368 
 369     } else if (cmp < 0 || (cmp == 0 && pcmk__str_eq(join_from, fsa_our_uname, pcmk__str_casei))) {
 370         crm_debug("Accepting join-%d request from %s (with better "
 371                   "CIB generation than current best from %s) " CRM_XS " ref=%s",
 372                   join_id, join_from, max_generation_from, ref);
 373         crm_log_xml_debug(max_generation_xml, "Old max generation");
 374         crm_log_xml_debug(generation, "New max generation");
 375 
 376         pcmk__str_update(&max_generation_from, join_from);
 377 
 378         free_xml(max_generation_xml);
 379         max_generation_xml = copy_xml(join_ack->xml);
 380 
 381     } else {
 382         crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
 383                   join_id, join_from, ref);
 384     }
 385 
 386     if (ack_nack_bool == FALSE) {
 387         crm_update_peer_join(__func__, join_node, crm_join_nack);
 388         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
 389     } else {
 390         crm_update_peer_join(__func__, join_node, crm_join_integrated);
 391         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
 392     }
 393 
 394     count = crmd_join_phase_count(crm_join_integrated);
 395     crm_debug("%d node%s currently integrated in join-%d",
 396               count, pcmk__plural_s(count), join_id);
 397 
 398     if (check_join_state(cur_state, __func__) == FALSE) {
 399         // Don't waste time by invoking the scheduler yet
 400         count = crmd_join_phase_count(crm_join_welcomed);
 401         crm_debug("Waiting on join-%d requests from %d outstanding node%s",
 402                   join_id, count, pcmk__plural_s(count));
 403     }
 404 }
 405 
 406 /*      A_DC_JOIN_FINALIZE      */
 407 void
 408 do_dc_join_finalize(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 409                     enum crmd_fsa_cause cause,
 410                     enum crmd_fsa_state cur_state,
 411                     enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 412 {
 413     char *sync_from = NULL;
 414     int rc = pcmk_ok;
 415     int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
 416     int count_integrated = crmd_join_phase_count(crm_join_integrated);
 417 
 418     /* This we can do straight away and avoid clients timing us out
 419      *  while we compute the latest CIB
 420      */
 421     if (count_welcomed != 0) {
 422         crm_debug("Waiting on join-%d requests from %d outstanding node%s "
 423                   "before finalizing join", current_join_id, count_welcomed,
 424                   pcmk__plural_s(count_welcomed));
 425         crmd_join_phase_log(LOG_DEBUG);
 426         /* crmd_fsa_stall(FALSE); Needed? */
 427         return;
 428 
 429     } else if (count_integrated == 0) {
 430         crm_debug("Finalization not needed for join-%d at the current time",
 431                   current_join_id);
 432         crmd_join_phase_log(LOG_DEBUG);
 433         check_join_state(fsa_state, __func__);
 434         return;
 435     }
 436 
 437     controld_clear_fsa_input_flags(R_HAVE_CIB);
 438     if (pcmk__str_eq(max_generation_from, fsa_our_uname, pcmk__str_null_matches | pcmk__str_casei)) {
 439         controld_set_fsa_input_flags(R_HAVE_CIB);
 440     }
 441 
 442     if (pcmk_is_set(fsa_input_register, R_IN_TRANSITION)) {
 443         crm_warn("Delaying join-%d finalization while transition in progress",
 444                  current_join_id);
 445         crmd_join_phase_log(LOG_DEBUG);
 446         crmd_fsa_stall(FALSE);
 447         return;
 448     }
 449 
 450     if (max_generation_from && !pcmk_is_set(fsa_input_register, R_HAVE_CIB)) {
 451         /* ask for the agreed best CIB */
 452         sync_from = strdup(max_generation_from);
 453         controld_set_fsa_input_flags(R_CIB_ASKED);
 454         crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
 455                    current_join_id, count_integrated,
 456                    pcmk__plural_s(count_integrated), sync_from);
 457         crm_log_xml_notice(max_generation_xml, "Requested CIB version");
 458 
 459     } else {
 460         /* Send _our_ CIB out to everyone */
 461         sync_from = strdup(fsa_our_uname);
 462         crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
 463                   current_join_id, count_integrated,
 464                   pcmk__plural_s(count_integrated));
 465         crm_log_xml_debug(max_generation_xml, "Requested CIB version");
 466     }
 467     crmd_join_phase_log(LOG_DEBUG);
 468 
 469     rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override);
 470     fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback);
 471 }
 472 
 473 void
 474 finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 475 {
 476     CRM_LOG_ASSERT(-EPERM != rc);
 477     controld_clear_fsa_input_flags(R_CIB_ASKED);
 478     if (rc != pcmk_ok) {
 479         do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
 480                    "Could not sync CIB from %s in join-%d: %s",
 481                    (char *) user_data, current_join_id, pcmk_strerror(rc));
 482 
 483         /* restart the whole join process */
 484         register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
 485                                __func__);
 486 
 487     } else if (!AM_I_DC) {
 488         crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
 489 
 490     } else if (fsa_state != S_FINALIZE_JOIN) {
 491         crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN (%s)",
 492                   current_join_id, fsa_state2string(fsa_state));
 493 
 494     } else {
 495         controld_set_fsa_input_flags(R_HAVE_CIB);
 496         controld_clear_fsa_input_flags(R_CIB_ASKED);
 497 
 498         /* make sure dc_uuid is re-set to us */
 499         if (check_join_state(fsa_state, __func__) == FALSE) {
 500             int count_integrated = crmd_join_phase_count(crm_join_integrated);
 501 
 502             crm_debug("Notifying %d node%s of join-%d results",
 503                       count_integrated, pcmk__plural_s(count_integrated),
 504                       current_join_id);
 505             g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
 506         }
 507     }
 508 }
 509 
 510 static void
 511 join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 512 {
 513     fsa_data_t *msg_data = NULL;
 514 
 515     if (rc == pcmk_ok) {
 516         crm_debug("join-%d node history update (via CIB call %d) complete",
 517                   current_join_id, call_id);
 518         check_join_state(fsa_state, __func__);
 519 
 520     } else {
 521         crm_err("join-%d node history update (via CIB call %d) failed: %s "
 522                 "(next transition may determine resource status incorrectly)",
 523                 current_join_id, call_id, pcmk_strerror(rc));
 524         crm_log_xml_debug(msg, "failed");
 525         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 526     }
 527 }
 528 
 529 /*      A_DC_JOIN_PROCESS_ACK   */
 530 void
 531 do_dc_join_ack(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 532                enum crmd_fsa_cause cause,
 533                enum crmd_fsa_state cur_state,
 534                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 535 {
 536     int join_id = -1;
 537     int call_id = 0;
 538     ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
 539     enum controld_section_e section = controld_section_lrm;
 540 
 541     const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
 542     const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
 543     crm_node_t *peer = NULL;
 544 
 545     // Sanity checks
 546     if (join_from == NULL) {
 547         crm_warn("Ignoring message received without node identification");
 548         return;
 549     }
 550     if (op == NULL) {
 551         crm_warn("Ignoring message received from %s without task", join_from);
 552         return;
 553     }
 554 
 555     if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
 556         crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
 557                   op, join_from, CRM_OP_JOIN_CONFIRM);
 558         return;
 559     }
 560 
 561     if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) {
 562         crm_warn("Ignoring join confirmation from %s without valid join ID",
 563                  join_from);
 564         return;
 565     }
 566 
 567     peer = crm_get_peer(0, join_from);
 568     if (peer->join != crm_join_finalized) {
 569         crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
 570                  "(currently %s not %s)",
 571                  join_id, join_from, crm_join_phase_str(peer->join),
 572                  crm_join_phase_str(crm_join_finalized));
 573         return;
 574     }
 575 
 576     if (join_id != current_join_id) {
 577         crm_err("Rejecting join-%d confirmation from %s "
 578                 "because currently on join-%d",
 579                 join_id, join_from, current_join_id);
 580         crm_update_peer_join(__func__, peer, crm_join_nack);
 581         return;
 582     }
 583 
 584     crm_update_peer_join(__func__, peer, crm_join_confirmed);
 585 
 586     /* Update CIB with node's current executor state. A new transition will be
 587      * triggered later, when the CIB notifies us of the change.
 588      */
 589     if (controld_shutdown_lock_enabled) {
 590         section = controld_section_lrm_unlocked;
 591     }
 592     controld_delete_node_state(join_from, section, cib_scope_local);
 593     if (pcmk__str_eq(join_from, fsa_our_uname, pcmk__str_casei)) {
 594         xmlNode *now_dc_lrmd_state = controld_query_executor_state();
 595 
 596         if (now_dc_lrmd_state != NULL) {
 597             fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state,
 598                 cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
 599             free_xml(now_dc_lrmd_state);
 600             crm_debug("Updating local node history for join-%d "
 601                       "from query result (via CIB call %d)", join_id, call_id);
 602         } else {
 603             fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
 604                 cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
 605             crm_warn("Updating local node history from join-%d confirmation "
 606                      "because query failed (via CIB call %d)", join_id, call_id);
 607         }
 608     } else {
 609         fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
 610            cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
 611         crm_debug("Updating node history for %s from join-%d confirmation "
 612                   "(via CIB call %d)", join_from, join_id, call_id);
 613     }
 614     fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback);
 615 }
 616 
 617 void
 618 finalize_join_for(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 619 {
 620     xmlNode *acknak = NULL;
 621     xmlNode *tmp1 = NULL;
 622     crm_node_t *join_node = value;
 623     const char *join_to = join_node->uname;
 624 
 625     if(join_node->join != crm_join_integrated) {
 626         crm_trace("Not updating non-integrated node %s (%s) for join-%d",
 627                   join_to, crm_join_phase_str(join_node->join),
 628                   current_join_id);
 629         return;
 630     }
 631 
 632     crm_trace("Updating node state for %s", join_to);
 633     tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
 634     set_uuid(tmp1, XML_ATTR_UUID, join_node);
 635     crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
 636     fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1);
 637     free_xml(tmp1);
 638 
 639     join_node = crm_get_peer(0, join_to);
 640     if (crm_is_peer_active(join_node) == FALSE) {
 641         /*
 642          * NACK'ing nodes that the membership layer doesn't know about yet
 643          * simply creates more churn
 644          *
 645          * Better to leave them waiting and let the join restart when
 646          * the new membership event comes in
 647          *
 648          * All other NACKs (due to versions etc) should still be processed
 649          */
 650         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
 651         return;
 652     }
 653 
 654     // Acknowledge node's join request
 655     crm_debug("Acknowledging join-%d request from %s",
 656               current_join_id, join_to);
 657     acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
 658     pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, true);
 659     crm_update_peer_join(__func__, join_node, crm_join_finalized);
 660     pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
 661 
 662     send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, acknak, TRUE);
 663     free_xml(acknak);
 664     return;
 665 }
 666 
 667 gboolean
 668 check_join_state(enum crmd_fsa_state cur_state, const char *source)
     /* [previous][next][first][last][top][bottom][index][help] */
 669 {
 670     static unsigned long long highest_seq = 0;
 671 
 672     if (saved_ccm_membership_id != crm_peer_seq) {
 673         crm_debug("join-%d: Membership changed from %llu to %llu "
 674                   CRM_XS " highest=%llu state=%s for=%s",
 675                   current_join_id, saved_ccm_membership_id, crm_peer_seq, highest_seq,
 676                   fsa_state2string(cur_state), source);
 677         if(highest_seq < crm_peer_seq) {
 678             /* Don't spam the FSA with duplicates */
 679             highest_seq = crm_peer_seq;
 680             register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
 681         }
 682 
 683     } else if (cur_state == S_INTEGRATION) {
 684         if (crmd_join_phase_count(crm_join_welcomed) == 0) {
 685             int count = crmd_join_phase_count(crm_join_integrated);
 686 
 687             crm_debug("join-%d: Integration of %d peer%s complete "
 688                       CRM_XS " state=%s for=%s",
 689                       current_join_id, count, pcmk__plural_s(count),
 690                       fsa_state2string(cur_state), source);
 691             register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
 692             return TRUE;
 693         }
 694 
 695     } else if (cur_state == S_FINALIZE_JOIN) {
 696         if (!pcmk_is_set(fsa_input_register, R_HAVE_CIB)) {
 697             crm_debug("join-%d: Delaying finalization until we have CIB "
 698                       CRM_XS " state=%s for=%s",
 699                       current_join_id, fsa_state2string(cur_state), source);
 700             return TRUE;
 701 
 702         } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
 703             int count = crmd_join_phase_count(crm_join_welcomed);
 704 
 705             crm_debug("join-%d: Still waiting on %d welcomed node%s "
 706                       CRM_XS " state=%s for=%s",
 707                       current_join_id, count, pcmk__plural_s(count),
 708                       fsa_state2string(cur_state), source);
 709             crmd_join_phase_log(LOG_DEBUG);
 710 
 711         } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
 712             int count = crmd_join_phase_count(crm_join_integrated);
 713 
 714             crm_debug("join-%d: Still waiting on %d integrated node%s "
 715                       CRM_XS " state=%s for=%s",
 716                       current_join_id, count, pcmk__plural_s(count),
 717                       fsa_state2string(cur_state), source);
 718             crmd_join_phase_log(LOG_DEBUG);
 719 
 720         } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
 721             int count = crmd_join_phase_count(crm_join_finalized);
 722 
 723             crm_debug("join-%d: Still waiting on %d finalized node%s "
 724                       CRM_XS " state=%s for=%s",
 725                       current_join_id, count, pcmk__plural_s(count),
 726                       fsa_state2string(cur_state), source);
 727             crmd_join_phase_log(LOG_DEBUG);
 728 
 729         } else {
 730             crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
 731                       current_join_id, fsa_state2string(cur_state), source);
 732             register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
 733             return TRUE;
 734         }
 735     }
 736 
 737     return FALSE;
 738 }
 739 
 740 void
 741 do_dc_join_final(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 742                  enum crmd_fsa_cause cause,
 743                  enum crmd_fsa_state cur_state,
 744                  enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 745 {
 746     crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
 747     crm_update_quorum(crm_have_quorum, TRUE);
 748 }
 749 
 750 int crmd_join_phase_count(enum crm_join_phase phase)
     /* [previous][next][first][last][top][bottom][index][help] */
 751 {
 752     int count = 0;
 753     crm_node_t *peer;
 754     GHashTableIter iter;
 755 
 756     g_hash_table_iter_init(&iter, crm_peer_cache);
 757     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
 758         if(peer->join == phase) {
 759             count++;
 760         }
 761     }
 762     return count;
 763 }
 764 
 765 void crmd_join_phase_log(int level)
     /* [previous][next][first][last][top][bottom][index][help] */
 766 {
 767     crm_node_t *peer;
 768     GHashTableIter iter;
 769 
 770     g_hash_table_iter_init(&iter, crm_peer_cache);
 771     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
 772         do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
 773                    crm_join_phase_str(peer->join));
 774     }
 775 }

/* [previous][next][first][last][top][bottom][index][help] */