root/daemons/controld/controld_join_dc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. crm_update_peer_join
  2. start_join_round
  3. create_dc_message
  4. join_make_offer
  5. do_dc_join_offer_all
  6. do_dc_join_offer_one
  7. compare_int_fields
  8. do_dc_join_filter_offer
  9. do_dc_join_finalize
  10. finalize_sync_callback
  11. join_update_complete_callback
  12. do_dc_join_ack
  13. finalize_join_for
  14. check_join_state
  15. do_dc_join_final
  16. crmd_join_phase_count
  17. crmd_join_phase_log

   1 /*
   2  * Copyright 2004-2021 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <crm/crm.h>
  13 
  14 #include <crm/msg_xml.h>
  15 #include <crm/common/xml.h>
  16 #include <crm/cluster.h>
  17 
  18 #include <pacemaker-controld.h>
  19 
  20 char *max_epoch = NULL;
  21 char *max_generation_from = NULL;
  22 xmlNode *max_generation_xml = NULL;
  23 
  24 void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
  25 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
  26 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
  27 
  28 /* Numeric counter used to identify join rounds (an unsigned int would be
  29  * appropriate, except we get and set it in XML as int)
  30  */
  31 static int current_join_id = 0;
  32 
  33 unsigned long long saved_ccm_membership_id = 0;
  34 
  35 void
  36 crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
     /* [previous][next][first][last][top][bottom][index][help] */
  37 {
  38     enum crm_join_phase last = 0;
  39 
  40     CRM_CHECK(node != NULL, return);
  41 
  42     /* Remote nodes do not participate in joins */
  43     if (pcmk_is_set(node->flags, crm_remote_node)) {
  44         return;
  45     }
  46 
  47     last = node->join;
  48 
  49     if(phase == last) {
  50         crm_trace("Node %s join-%d phase is still %s "
  51                   CRM_XS " nodeid=%u source=%s",
  52                   node->uname, current_join_id, crm_join_phase_str(last),
  53                   node->id, source);
  54 
  55     } else if ((phase <= crm_join_none) || (phase == (last + 1))) {
  56         node->join = phase;
  57         crm_trace("Node %s join-%d phase is now %s (was %s) "
  58                   CRM_XS " nodeid=%u source=%s",
  59                  node->uname, current_join_id, crm_join_phase_str(phase),
  60                  crm_join_phase_str(last), node->id, source);
  61 
  62     } else {
  63         crm_warn("Rejecting join-%d phase update for node %s because "
  64                  "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
  65                  current_join_id, node->uname, crm_join_phase_str(last),
  66                  crm_join_phase_str(phase), node->id, source);
  67     }
  68 }
  69 
  70 static void
  71 start_join_round(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  72 {
  73     GHashTableIter iter;
  74     crm_node_t *peer = NULL;
  75 
  76     crm_debug("Starting new join round join-%d", current_join_id);
  77 
  78     g_hash_table_iter_init(&iter, crm_peer_cache);
  79     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
  80         crm_update_peer_join(__func__, peer, crm_join_none);
  81     }
  82     if (max_generation_from != NULL) {
  83         free(max_generation_from);
  84         max_generation_from = NULL;
  85     }
  86     if (max_generation_xml != NULL) {
  87         free_xml(max_generation_xml);
  88         max_generation_xml = NULL;
  89     }
  90     controld_clear_fsa_input_flags(R_HAVE_CIB|R_CIB_ASKED);
  91 }
  92 
  93 /*!
  94  * \internal
  95  * \brief Create a join message from the DC
  96  *
  97  * \param[in] join_op  Join operation name
  98  * \param[in] host_to  Recipient of message
  99  */
 100 static xmlNode *
 101 create_dc_message(const char *join_op, const char *host_to)
     /* [previous][next][first][last][top][bottom][index][help] */
 102 {
 103     xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
 104                                   CRM_SYSTEM_DC, NULL);
 105 
 106     /* Identify which election this is a part of */
 107     crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id);
 108 
 109     /* Add a field specifying whether the DC is shutting down. This keeps the
 110      * joining node from fencing the old DC if it becomes the new DC.
 111      */
 112     crm_xml_add_boolean(msg, F_CRM_DC_LEAVING,
 113                         pcmk_is_set(fsa_input_register, R_SHUTDOWN));
 114     return msg;
 115 }
 116 
 117 static void
 118 join_make_offer(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 119 {
 120     xmlNode *offer = NULL;
 121     crm_node_t *member = (crm_node_t *)value;
 122 
 123     CRM_ASSERT(member != NULL);
 124     if (crm_is_peer_active(member) == FALSE) {
 125         crm_info("Not making join-%d offer to inactive node %s",
 126                  current_join_id,
 127                  (member->uname? member->uname : "with unknown name"));
 128         if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
 129             /* You would think this unsafe, but in fact this plus an
 130              * active resource is what causes it to be fenced.
 131              *
 132              * Yes, this does mean that any node that dies at the same
 133              * time as the old DC and is not running resource (still)
 134              * won't be fenced.
 135              *
 136              * I'm not happy about this either.
 137              */
 138             pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
 139         }
 140         return;
 141     }
 142 
 143     if (member->uname == NULL) {
 144         crm_info("Not making join-%d offer to node uuid %s with unknown name",
 145                  current_join_id, member->uuid);
 146         return;
 147     }
 148 
 149     if (saved_ccm_membership_id != crm_peer_seq) {
 150         saved_ccm_membership_id = crm_peer_seq;
 151         crm_info("Making join-%d offers based on membership event %llu",
 152                  current_join_id, crm_peer_seq);
 153     }
 154 
 155     if(user_data && member->join > crm_join_none) {
 156         crm_info("Not making join-%d offer to already known node %s (%s)",
 157                  current_join_id, member->uname,
 158                  crm_join_phase_str(member->join));
 159         return;
 160     }
 161 
 162     crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);
 163 
 164     offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
 165 
 166     // Advertise our feature set so the joining node can bail if not compatible
 167     crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
 168 
 169     crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
 170     send_cluster_message(member, crm_msg_crmd, offer, TRUE);
 171     free_xml(offer);
 172 
 173     crm_update_peer_join(__func__, member, crm_join_welcomed);
 174 }
 175 
 176 /*       A_DC_JOIN_OFFER_ALL    */
 177 void
 178 do_dc_join_offer_all(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 179                      enum crmd_fsa_cause cause,
 180                      enum crmd_fsa_state cur_state,
 181                      enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 182 {
 183     int count;
 184 
 185     /* Reset everyone's status back to down or in_ccm in the CIB.
 186      * Any nodes that are active in the CIB but not in the cluster membership
 187      * will be seen as offline by the scheduler anyway.
 188      */
 189     current_join_id++;
 190     start_join_round();
 191     // do_update_cib_nodes(TRUE, __func__);
 192 
 193     update_dc(NULL);
 194     if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
 195         crm_info("A new node joined the cluster");
 196     }
 197     g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
 198 
 199     count = crmd_join_phase_count(crm_join_welcomed);
 200     crm_info("Waiting on join-%d requests from %d outstanding node%s",
 201              current_join_id, count, pcmk__plural_s(count));
 202 
 203     // Don't waste time by invoking the scheduler yet
 204 }
 205 
 206 /*       A_DC_JOIN_OFFER_ONE    */
 207 void
 208 do_dc_join_offer_one(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 209                      enum crmd_fsa_cause cause,
 210                      enum crmd_fsa_state cur_state,
 211                      enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 212 {
 213     crm_node_t *member;
 214     ha_msg_input_t *welcome = NULL;
 215     int count;
 216     const char *join_to = NULL;
 217 
 218     if (msg_data->data == NULL) {
 219         crm_info("Making join-%d offers to any unconfirmed nodes "
 220                  "because an unknown node joined", current_join_id);
 221         g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
 222         check_join_state(cur_state, __func__);
 223         return;
 224     }
 225 
 226     welcome = fsa_typed_data(fsa_dt_ha_msg);
 227     if (welcome == NULL) {
 228         // fsa_typed_data() already logged an error
 229         return;
 230     }
 231 
 232     join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
 233     if (join_to == NULL) {
 234         crm_err("Can't make join-%d offer to unknown node", current_join_id);
 235         return;
 236     }
 237     member = crm_get_peer(0, join_to);
 238 
 239     /* It is possible that a node will have been sick or starting up when the
 240      * original offer was made. However, it will either re-announce itself in
 241      * due course, or we can re-store the original offer on the client.
 242      */
 243 
 244     crm_update_peer_join(__func__, member, crm_join_none);
 245     join_make_offer(NULL, member, NULL);
 246 
 247     /* If the offer isn't to the local node, make an offer to the local node as
 248      * well, to ensure the correct value for max_generation_from.
 249      */
 250     if (strcmp(join_to, fsa_our_uname) != 0) {
 251         member = crm_get_peer(0, fsa_our_uname);
 252         join_make_offer(NULL, member, NULL);
 253     }
 254 
 255     /* This was a genuine join request; cancel any existing transition and
 256      * invoke the scheduler.
 257      */
 258     abort_transition(INFINITY, tg_restart, "Node join", NULL);
 259 
 260     count = crmd_join_phase_count(crm_join_welcomed);
 261     crm_info("Waiting on join-%d requests from %d outstanding node%s",
 262              current_join_id, count, pcmk__plural_s(count));
 263 
 264     // Don't waste time by invoking the scheduler yet
 265 }
 266 
 267 static int
 268 compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
     /* [previous][next][first][last][top][bottom][index][help] */
 269 {
 270     const char *elem_l = crm_element_value(left, field);
 271     const char *elem_r = crm_element_value(right, field);
 272 
 273     long long int_elem_l;
 274     long long int_elem_r;
 275 
 276     pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
 277     pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
 278 
 279     if (int_elem_l < int_elem_r) {
 280         return -1;
 281 
 282     } else if (int_elem_l > int_elem_r) {
 283         return 1;
 284     }
 285 
 286     return 0;
 287 }
 288 
 289 /*       A_DC_JOIN_PROCESS_REQ  */
 290 void
 291 do_dc_join_filter_offer(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 292                         enum crmd_fsa_cause cause,
 293                         enum crmd_fsa_state cur_state,
 294                         enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 295 {
 296     xmlNode *generation = NULL;
 297 
 298     int cmp = 0;
 299     int join_id = -1;
 300     int count = 0;
 301     gboolean ack_nack_bool = TRUE;
 302     ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
 303 
 304     const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
 305     const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
 306     const char *join_version = crm_element_value(join_ack->msg,
 307                                                  XML_ATTR_CRM_VERSION);
 308     crm_node_t *join_node = NULL;
 309 
 310     if (join_from == NULL) {
 311         crm_err("Ignoring invalid join request without node name");
 312         return;
 313     }
 314     join_node = crm_get_peer(0, join_from);
 315 
 316     crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
 317     if (join_id != current_join_id) {
 318         crm_debug("Ignoring join-%d request from %s because we are on join-%d",
 319                   join_id, join_from, current_join_id);
 320         check_join_state(cur_state, __func__);
 321         return;
 322     }
 323 
 324     generation = join_ack->xml;
 325     if (max_generation_xml != NULL && generation != NULL) {
 326         int lpc = 0;
 327 
 328         const char *attributes[] = {
 329             XML_ATTR_GENERATION_ADMIN,
 330             XML_ATTR_GENERATION,
 331             XML_ATTR_NUMUPDATES,
 332         };
 333 
 334         for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
 335             cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
 336         }
 337     }
 338 
 339     if (ref == NULL) {
 340         ref = "none"; // for logging only
 341     }
 342 
 343     if (crm_is_peer_active(join_node) == FALSE) {
 344         crm_err("Rejecting join-%d request from inactive node %s "
 345                 CRM_XS " ref=%s", join_id, join_from, ref);
 346         ack_nack_bool = FALSE;
 347 
 348     } else if (generation == NULL) {
 349         crm_err("Rejecting invalid join-%d request from node %s "
 350                 "missing CIB generation " CRM_XS " ref=%s",
 351                 join_id, join_from, ref);
 352         ack_nack_bool = FALSE;
 353 
 354     } else if ((join_version == NULL)
 355                || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
 356         crm_err("Rejecting join-%d request from node %s because feature set %s"
 357                 " is incompatible with ours (%s) " CRM_XS " ref=%s",
 358                 join_id, join_from, (join_version? join_version : "pre-3.1.0"),
 359                 CRM_FEATURE_SET, ref);
 360         ack_nack_bool = FALSE;
 361 
 362     } else if (max_generation_xml == NULL) {
 363         crm_debug("Accepting join-%d request from %s "
 364                   "(with first CIB generation) " CRM_XS " ref=%s",
 365                   join_id, join_from, ref);
 366         max_generation_xml = copy_xml(generation);
 367         max_generation_from = strdup(join_from);
 368 
 369     } else if (cmp < 0 || (cmp == 0 && pcmk__str_eq(join_from, fsa_our_uname, pcmk__str_casei))) {
 370         crm_debug("Accepting join-%d request from %s (with better "
 371                   "CIB generation than current best from %s) " CRM_XS " ref=%s",
 372                   join_id, join_from, max_generation_from, ref);
 373         crm_log_xml_debug(max_generation_xml, "Old max generation");
 374         crm_log_xml_debug(generation, "New max generation");
 375 
 376         free(max_generation_from);
 377         free_xml(max_generation_xml);
 378 
 379         max_generation_from = strdup(join_from);
 380         max_generation_xml = copy_xml(join_ack->xml);
 381 
 382     } else {
 383         crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
 384                   join_id, join_from, ref);
 385     }
 386 
 387     if (ack_nack_bool == FALSE) {
 388         crm_update_peer_join(__func__, join_node, crm_join_nack);
 389         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
 390     } else {
 391         crm_update_peer_join(__func__, join_node, crm_join_integrated);
 392         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
 393     }
 394 
 395     count = crmd_join_phase_count(crm_join_integrated);
 396     crm_debug("%d node%s currently integrated in join-%d",
 397               count, pcmk__plural_s(count), join_id);
 398 
 399     if (check_join_state(cur_state, __func__) == FALSE) {
 400         // Don't waste time by invoking the scheduler yet
 401         count = crmd_join_phase_count(crm_join_welcomed);
 402         crm_debug("Waiting on join-%d requests from %d outstanding node%s",
 403                   join_id, count, pcmk__plural_s(count));
 404     }
 405 }
 406 
 407 /*      A_DC_JOIN_FINALIZE      */
 408 void
 409 do_dc_join_finalize(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 410                     enum crmd_fsa_cause cause,
 411                     enum crmd_fsa_state cur_state,
 412                     enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 413 {
 414     char *sync_from = NULL;
 415     int rc = pcmk_ok;
 416     int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
 417     int count_integrated = crmd_join_phase_count(crm_join_integrated);
 418 
 419     /* This we can do straight away and avoid clients timing us out
 420      *  while we compute the latest CIB
 421      */
 422     if (count_welcomed != 0) {
 423         crm_debug("Waiting on join-%d requests from %d outstanding node%s "
 424                   "before finalizing join", current_join_id, count_welcomed,
 425                   pcmk__plural_s(count_welcomed));
 426         crmd_join_phase_log(LOG_DEBUG);
 427         /* crmd_fsa_stall(FALSE); Needed? */
 428         return;
 429 
 430     } else if (count_integrated == 0) {
 431         crm_debug("Finalization not needed for join-%d at the current time",
 432                   current_join_id);
 433         crmd_join_phase_log(LOG_DEBUG);
 434         check_join_state(fsa_state, __func__);
 435         return;
 436     }
 437 
 438     controld_clear_fsa_input_flags(R_HAVE_CIB);
 439     if (pcmk__str_eq(max_generation_from, fsa_our_uname, pcmk__str_null_matches | pcmk__str_casei)) {
 440         controld_set_fsa_input_flags(R_HAVE_CIB);
 441     }
 442 
 443     if (pcmk_is_set(fsa_input_register, R_IN_TRANSITION)) {
 444         crm_warn("Delaying join-%d finalization while transition in progress",
 445                  current_join_id);
 446         crmd_join_phase_log(LOG_DEBUG);
 447         crmd_fsa_stall(FALSE);
 448         return;
 449     }
 450 
 451     if (max_generation_from && !pcmk_is_set(fsa_input_register, R_HAVE_CIB)) {
 452         /* ask for the agreed best CIB */
 453         sync_from = strdup(max_generation_from);
 454         controld_set_fsa_input_flags(R_CIB_ASKED);
 455         crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
 456                    current_join_id, count_integrated,
 457                    pcmk__plural_s(count_integrated), sync_from);
 458         crm_log_xml_notice(max_generation_xml, "Requested CIB version");
 459 
 460     } else {
 461         /* Send _our_ CIB out to everyone */
 462         sync_from = strdup(fsa_our_uname);
 463         crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
 464                   current_join_id, count_integrated,
 465                   pcmk__plural_s(count_integrated));
 466         crm_log_xml_debug(max_generation_xml, "Requested CIB version");
 467     }
 468     crmd_join_phase_log(LOG_DEBUG);
 469 
 470     rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override);
 471     fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback);
 472 }
 473 
 474 void
 475 finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 476 {
 477     CRM_LOG_ASSERT(-EPERM != rc);
 478     controld_clear_fsa_input_flags(R_CIB_ASKED);
 479     if (rc != pcmk_ok) {
 480         do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
 481                    "Could not sync CIB from %s in join-%d: %s",
 482                    (char *) user_data, current_join_id, pcmk_strerror(rc));
 483 
 484         /* restart the whole join process */
 485         register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
 486                                __func__);
 487 
 488     } else if (!AM_I_DC) {
 489         crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
 490 
 491     } else if (fsa_state != S_FINALIZE_JOIN) {
 492         crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN (%s)",
 493                   current_join_id, fsa_state2string(fsa_state));
 494 
 495     } else {
 496         controld_set_fsa_input_flags(R_HAVE_CIB);
 497         controld_clear_fsa_input_flags(R_CIB_ASKED);
 498 
 499         /* make sure dc_uuid is re-set to us */
 500         if (check_join_state(fsa_state, __func__) == FALSE) {
 501             int count_integrated = crmd_join_phase_count(crm_join_integrated);
 502 
 503             crm_debug("Notifying %d node%s of join-%d results",
 504                       count_integrated, pcmk__plural_s(count_integrated),
 505                       current_join_id);
 506             g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
 507         }
 508     }
 509 }
 510 
 511 static void
 512 join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 513 {
 514     fsa_data_t *msg_data = NULL;
 515 
 516     if (rc == pcmk_ok) {
 517         crm_debug("join-%d node history update (via CIB call %d) complete",
 518                   current_join_id, call_id);
 519         check_join_state(fsa_state, __func__);
 520 
 521     } else {
 522         crm_err("join-%d node history update (via CIB call %d) failed: %s "
 523                 "(next transition may determine resource status incorrectly)",
 524                 current_join_id, call_id, pcmk_strerror(rc));
 525         crm_log_xml_debug(msg, "failed");
 526         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 527     }
 528 }
 529 
 530 /*      A_DC_JOIN_PROCESS_ACK   */
 531 void
 532 do_dc_join_ack(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 533                enum crmd_fsa_cause cause,
 534                enum crmd_fsa_state cur_state,
 535                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 536 {
 537     int join_id = -1;
 538     int call_id = 0;
 539     ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
 540     enum controld_section_e section = controld_section_lrm;
 541 
 542     const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
 543     const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
 544     crm_node_t *peer = NULL;
 545 
 546     // Sanity checks
 547     if (join_from == NULL) {
 548         crm_warn("Ignoring message received without node identification");
 549         return;
 550     }
 551     if (op == NULL) {
 552         crm_warn("Ignoring message received from %s without task", join_from);
 553         return;
 554     }
 555 
 556     if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
 557         crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
 558                   op, join_from, CRM_OP_JOIN_CONFIRM);
 559         return;
 560     }
 561 
 562     if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) {
 563         crm_warn("Ignoring join confirmation from %s without valid join ID",
 564                  join_from);
 565         return;
 566     }
 567 
 568     peer = crm_get_peer(0, join_from);
 569     if (peer->join != crm_join_finalized) {
 570         crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
 571                  "(currently %s not %s)",
 572                  join_id, join_from, crm_join_phase_str(peer->join),
 573                  crm_join_phase_str(crm_join_finalized));
 574         return;
 575     }
 576 
 577     if (join_id != current_join_id) {
 578         crm_err("Rejecting join-%d confirmation from %s "
 579                 "because currently on join-%d",
 580                 join_id, join_from, current_join_id);
 581         crm_update_peer_join(__func__, peer, crm_join_nack);
 582         return;
 583     }
 584 
 585     crm_update_peer_join(__func__, peer, crm_join_confirmed);
 586 
 587     /* Update CIB with node's current executor state. A new transition will be
 588      * triggered later, when the CIB notifies us of the change.
 589      */
 590     if (controld_shutdown_lock_enabled) {
 591         section = controld_section_lrm_unlocked;
 592     }
 593     controld_delete_node_state(join_from, section, cib_scope_local);
 594     if (pcmk__str_eq(join_from, fsa_our_uname, pcmk__str_casei)) {
 595         xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname);
 596 
 597         if (now_dc_lrmd_state != NULL) {
 598             fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state,
 599                 cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
 600             free_xml(now_dc_lrmd_state);
 601             crm_debug("Updating local node history for join-%d "
 602                       "from query result (via CIB call %d)", join_id, call_id);
 603         } else {
 604             fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
 605                 cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
 606             crm_warn("Updating local node history from join-%d confirmation "
 607                      "because query failed (via CIB call %d)", join_id, call_id);
 608         }
 609     } else {
 610         fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
 611            cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
 612         crm_debug("Updating node history for %s from join-%d confirmation "
 613                   "(via CIB call %d)", join_from, join_id, call_id);
 614     }
 615     fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback);
 616 }
 617 
 618 void
 619 finalize_join_for(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 620 {
 621     xmlNode *acknak = NULL;
 622     xmlNode *tmp1 = NULL;
 623     crm_node_t *join_node = value;
 624     const char *join_to = join_node->uname;
 625 
 626     if(join_node->join != crm_join_integrated) {
 627         crm_trace("Not updating non-integrated node %s (%s) for join-%d",
 628                   join_to, crm_join_phase_str(join_node->join),
 629                   current_join_id);
 630         return;
 631     }
 632 
 633     crm_trace("Updating node state for %s", join_to);
 634     tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
 635     set_uuid(tmp1, XML_ATTR_UUID, join_node);
 636     crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
 637     fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1);
 638     free_xml(tmp1);
 639 
 640     join_node = crm_get_peer(0, join_to);
 641     if (crm_is_peer_active(join_node) == FALSE) {
 642         /*
 643          * NACK'ing nodes that the membership layer doesn't know about yet
 644          * simply creates more churn
 645          *
 646          * Better to leave them waiting and let the join restart when
 647          * the new membership event comes in
 648          *
 649          * All other NACKs (due to versions etc) should still be processed
 650          */
 651         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
 652         return;
 653     }
 654 
 655     // Acknowledge node's join request
 656     crm_debug("Acknowledging join-%d request from %s",
 657               current_join_id, join_to);
 658     acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
 659     crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE);
 660     crm_update_peer_join(__func__, join_node, crm_join_finalized);
 661     pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
 662 
 663     send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, acknak, TRUE);
 664     free_xml(acknak);
 665     return;
 666 }
 667 
 668 gboolean
 669 check_join_state(enum crmd_fsa_state cur_state, const char *source)
     /* [previous][next][first][last][top][bottom][index][help] */
 670 {
 671     static unsigned long long highest_seq = 0;
 672 
 673     if (saved_ccm_membership_id != crm_peer_seq) {
 674         crm_debug("join-%d: Membership changed from %llu to %llu "
 675                   CRM_XS " highest=%llu state=%s for=%s",
 676                   current_join_id, saved_ccm_membership_id, crm_peer_seq, highest_seq,
 677                   fsa_state2string(cur_state), source);
 678         if(highest_seq < crm_peer_seq) {
 679             /* Don't spam the FSA with duplicates */
 680             highest_seq = crm_peer_seq;
 681             register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
 682         }
 683 
 684     } else if (cur_state == S_INTEGRATION) {
 685         if (crmd_join_phase_count(crm_join_welcomed) == 0) {
 686             int count = crmd_join_phase_count(crm_join_integrated);
 687 
 688             crm_debug("join-%d: Integration of %d peer%s complete "
 689                       CRM_XS " state=%s for=%s",
 690                       current_join_id, count, pcmk__plural_s(count),
 691                       fsa_state2string(cur_state), source);
 692             register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
 693             return TRUE;
 694         }
 695 
 696     } else if (cur_state == S_FINALIZE_JOIN) {
 697         if (!pcmk_is_set(fsa_input_register, R_HAVE_CIB)) {
 698             crm_debug("join-%d: Delaying finalization until we have CIB "
 699                       CRM_XS " state=%s for=%s",
 700                       current_join_id, fsa_state2string(cur_state), source);
 701             return TRUE;
 702 
 703         } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
 704             int count = crmd_join_phase_count(crm_join_welcomed);
 705 
 706             crm_debug("join-%d: Still waiting on %d welcomed node%s "
 707                       CRM_XS " state=%s for=%s",
 708                       current_join_id, count, pcmk__plural_s(count),
 709                       fsa_state2string(cur_state), source);
 710             crmd_join_phase_log(LOG_DEBUG);
 711 
 712         } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
 713             int count = crmd_join_phase_count(crm_join_integrated);
 714 
 715             crm_debug("join-%d: Still waiting on %d integrated node%s "
 716                       CRM_XS " state=%s for=%s",
 717                       current_join_id, count, pcmk__plural_s(count),
 718                       fsa_state2string(cur_state), source);
 719             crmd_join_phase_log(LOG_DEBUG);
 720 
 721         } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
 722             int count = crmd_join_phase_count(crm_join_finalized);
 723 
 724             crm_debug("join-%d: Still waiting on %d finalized node%s "
 725                       CRM_XS " state=%s for=%s",
 726                       current_join_id, count, pcmk__plural_s(count),
 727                       fsa_state2string(cur_state), source);
 728             crmd_join_phase_log(LOG_DEBUG);
 729 
 730         } else {
 731             crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
 732                       current_join_id, fsa_state2string(cur_state), source);
 733             register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
 734             return TRUE;
 735         }
 736     }
 737 
 738     return FALSE;
 739 }
 740 
 741 void
 742 do_dc_join_final(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 743                  enum crmd_fsa_cause cause,
 744                  enum crmd_fsa_state cur_state,
 745                  enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 746 {
 747     crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
 748     crm_update_quorum(crm_have_quorum, TRUE);
 749 }
 750 
 751 int crmd_join_phase_count(enum crm_join_phase phase)
     /* [previous][next][first][last][top][bottom][index][help] */
 752 {
 753     int count = 0;
 754     crm_node_t *peer;
 755     GHashTableIter iter;
 756 
 757     g_hash_table_iter_init(&iter, crm_peer_cache);
 758     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
 759         if(peer->join == phase) {
 760             count++;
 761         }
 762     }
 763     return count;
 764 }
 765 
 766 void crmd_join_phase_log(int level)
     /* [previous][next][first][last][top][bottom][index][help] */
 767 {
 768     crm_node_t *peer;
 769     GHashTableIter iter;
 770 
 771     g_hash_table_iter_init(&iter, crm_peer_cache);
 772     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
 773         do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
 774                    crm_join_phase_str(peer->join));
 775     }
 776 }

/* [previous][next][first][last][top][bottom][index][help] */