root/daemons/controld/controld_join_dc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. crm_update_peer_join
  2. start_join_round
  3. create_dc_message
  4. join_make_offer
  5. do_dc_join_offer_all
  6. do_dc_join_offer_one
  7. compare_int_fields
  8. do_dc_join_filter_offer
  9. do_dc_join_finalize
  10. finalize_sync_callback
  11. join_update_complete_callback
  12. do_dc_join_ack
  13. finalize_join_for
  14. check_join_state
  15. do_dc_join_final
  16. crmd_join_phase_count
  17. crmd_join_phase_log

   1 /*
   2  * Copyright 2004-2020 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <crm/crm.h>
  13 
  14 #include <crm/msg_xml.h>
  15 #include <crm/common/xml.h>
  16 #include <crm/cluster.h>
  17 
  18 #include <pacemaker-controld.h>
  19 
  20 char *max_epoch = NULL;
  21 char *max_generation_from = NULL;
  22 xmlNode *max_generation_xml = NULL;
  23 
  24 void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
  25 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
  26 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
  27 
  28 /* Numeric counter used to identify join rounds (an unsigned int would be
  29  * appropriate, except we get and set it in XML as int)
  30  */
  31 static int current_join_id = 0;
  32 
  33 unsigned long long saved_ccm_membership_id = 0;
  34 
  35 void
  36 crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
     /* [previous][next][first][last][top][bottom][index][help] */
  37 {
  38     enum crm_join_phase last = 0;
  39 
  40     CRM_CHECK(node != NULL, return);
  41 
  42     /* Remote nodes do not participate in joins */
  43     if (pcmk_is_set(node->flags, crm_remote_node)) {
  44         return;
  45     }
  46 
  47     last = node->join;
  48 
  49     if(phase == last) {
  50         crm_trace("Node %s join-%d phase is still %s "
  51                   CRM_XS " nodeid=%u source=%s",
  52                   node->uname, current_join_id, crm_join_phase_str(last),
  53                   node->id, source);
  54 
  55     } else if ((phase <= crm_join_none) || (phase == (last + 1))) {
  56         node->join = phase;
  57         crm_trace("Node %s join-%d phase is now %s (was %s) "
  58                   CRM_XS " nodeid=%u source=%s",
  59                  node->uname, current_join_id, crm_join_phase_str(phase),
  60                  crm_join_phase_str(last), node->id, source);
  61 
  62     } else {
  63         crm_warn("Rejecting join-%d phase update for node %s because "
  64                  "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
  65                  current_join_id, node->uname, crm_join_phase_str(last),
  66                  crm_join_phase_str(phase), node->id, source);
  67     }
  68 }
  69 
  70 static void
  71 start_join_round(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  72 {
  73     GHashTableIter iter;
  74     crm_node_t *peer = NULL;
  75 
  76     crm_debug("Starting new join round join-%d", current_join_id);
  77 
  78     g_hash_table_iter_init(&iter, crm_peer_cache);
  79     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
  80         crm_update_peer_join(__func__, peer, crm_join_none);
  81     }
  82     if (max_generation_from != NULL) {
  83         free(max_generation_from);
  84         max_generation_from = NULL;
  85     }
  86     if (max_generation_xml != NULL) {
  87         free_xml(max_generation_xml);
  88         max_generation_xml = NULL;
  89     }
  90     controld_clear_fsa_input_flags(R_HAVE_CIB|R_CIB_ASKED);
  91 }
  92 
  93 /*!
  94  * \internal
  95  * \brief Create a join message from the DC
  96  *
  97  * \param[in] join_op  Join operation name
  98  * \param[in] host_to  Recipient of message
  99  */
 100 static xmlNode *
 101 create_dc_message(const char *join_op, const char *host_to)
     /* [previous][next][first][last][top][bottom][index][help] */
 102 {
 103     xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
 104                                   CRM_SYSTEM_DC, NULL);
 105 
 106     /* Identify which election this is a part of */
 107     crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id);
 108 
 109     /* Add a field specifying whether the DC is shutting down. This keeps the
 110      * joining node from fencing the old DC if it becomes the new DC.
 111      */
 112     crm_xml_add_boolean(msg, F_CRM_DC_LEAVING,
 113                         pcmk_is_set(fsa_input_register, R_SHUTDOWN));
 114     return msg;
 115 }
 116 
 117 static void
 118 join_make_offer(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 119 {
 120     xmlNode *offer = NULL;
 121     crm_node_t *member = (crm_node_t *)value;
 122 
 123     CRM_ASSERT(member != NULL);
 124     if (crm_is_peer_active(member) == FALSE) {
 125         crm_info("Not making join-%d offer to inactive node %s",
 126                  current_join_id,
 127                  (member->uname? member->uname : "with unknown name"));
 128         if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
 129             /* You would think this unsafe, but in fact this plus an
 130              * active resource is what causes it to be fenced.
 131              *
 132              * Yes, this does mean that any node that dies at the same
 133              * time as the old DC and is not running resource (still)
 134              * won't be fenced.
 135              *
 136              * I'm not happy about this either.
 137              */
 138             crm_update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
 139         }
 140         return;
 141     }
 142 
 143     if (member->uname == NULL) {
 144         crm_info("Not making join-%d offer to node uuid %s with unknown name",
 145                  current_join_id, member->uuid);
 146         return;
 147     }
 148 
 149     if (saved_ccm_membership_id != crm_peer_seq) {
 150         saved_ccm_membership_id = crm_peer_seq;
 151         crm_info("Making join-%d offers based on membership event %llu",
 152                  current_join_id, crm_peer_seq);
 153     }
 154 
 155     if(user_data && member->join > crm_join_none) {
 156         crm_info("Not making join-%d offer to already known node %s (%s)",
 157                  current_join_id, member->uname,
 158                  crm_join_phase_str(member->join));
 159         return;
 160     }
 161 
 162     crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);
 163 
 164     offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
 165 
 166     // Advertise our feature set so the joining node can bail if not compatible
 167     crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
 168 
 169     crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
 170     send_cluster_message(member, crm_msg_crmd, offer, TRUE);
 171     free_xml(offer);
 172 
 173     crm_update_peer_join(__func__, member, crm_join_welcomed);
 174 }
 175 
 176 /*       A_DC_JOIN_OFFER_ALL    */
 177 void
 178 do_dc_join_offer_all(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 179                      enum crmd_fsa_cause cause,
 180                      enum crmd_fsa_state cur_state,
 181                      enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 182 {
 183     int count;
 184 
 185     /* Reset everyone's status back to down or in_ccm in the CIB.
 186      * Any nodes that are active in the CIB but not in the cluster membership
 187      * will be seen as offline by the scheduler anyway.
 188      */
 189     current_join_id++;
 190     start_join_round();
 191     // do_update_cib_nodes(TRUE, __func__);
 192 
 193     update_dc(NULL);
 194     if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
 195         crm_info("A new node joined the cluster");
 196     }
 197     g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
 198 
 199     count = crmd_join_phase_count(crm_join_welcomed);
 200     crm_info("Waiting on join-%d requests from %d outstanding node%s",
 201              current_join_id, count, pcmk__plural_s(count));
 202 
 203     // Don't waste time by invoking the scheduler yet
 204 }
 205 
 206 /*       A_DC_JOIN_OFFER_ONE    */
 207 void
 208 do_dc_join_offer_one(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 209                      enum crmd_fsa_cause cause,
 210                      enum crmd_fsa_state cur_state,
 211                      enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 212 {
 213     crm_node_t *member;
 214     ha_msg_input_t *welcome = NULL;
 215     int count;
 216     const char *join_to = NULL;
 217 
 218     if (msg_data->data == NULL) {
 219         crm_info("Making join-%d offers to any unconfirmed nodes "
 220                  "because an unknown node joined", current_join_id);
 221         g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
 222         check_join_state(cur_state, __func__);
 223         return;
 224     }
 225 
 226     welcome = fsa_typed_data(fsa_dt_ha_msg);
 227     if (welcome == NULL) {
 228         // fsa_typed_data() already logged an error
 229         return;
 230     }
 231 
 232     join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
 233     if (join_to == NULL) {
 234         crm_err("Can't make join-%d offer to unknown node", current_join_id);
 235         return;
 236     }
 237     member = crm_get_peer(0, join_to);
 238 
 239     /* It is possible that a node will have been sick or starting up when the
 240      * original offer was made. However, it will either re-announce itself in
 241      * due course, or we can re-store the original offer on the client.
 242      */
 243 
 244     crm_update_peer_join(__func__, member, crm_join_none);
 245     join_make_offer(NULL, member, NULL);
 246 
 247     /* If the offer isn't to the local node, make an offer to the local node as
 248      * well, to ensure the correct value for max_generation_from.
 249      */
 250     if (strcmp(join_to, fsa_our_uname) != 0) {
 251         member = crm_get_peer(0, fsa_our_uname);
 252         join_make_offer(NULL, member, NULL);
 253     }
 254 
 255     /* This was a genuine join request; cancel any existing transition and
 256      * invoke the scheduler.
 257      */
 258     abort_transition(INFINITY, tg_restart, "Node join", NULL);
 259 
 260     count = crmd_join_phase_count(crm_join_welcomed);
 261     crm_info("Waiting on join-%d requests from %d outstanding node%s",
 262              current_join_id, count, pcmk__plural_s(count));
 263 
 264     // Don't waste time by invoking the scheduler yet
 265 }
 266 
 267 static int
 268 compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
     /* [previous][next][first][last][top][bottom][index][help] */
 269 {
 270     const char *elem_l = crm_element_value(left, field);
 271     const char *elem_r = crm_element_value(right, field);
 272 
 273     long long int_elem_l = elem_l? crm_parse_ll(elem_l, NULL) : -1;
 274     long long int_elem_r = elem_r? crm_parse_ll(elem_r, NULL) : -1;
 275 
 276     if (int_elem_l < int_elem_r) {
 277         return -1;
 278 
 279     } else if (int_elem_l > int_elem_r) {
 280         return 1;
 281     }
 282 
 283     return 0;
 284 }
 285 
 286 /*       A_DC_JOIN_PROCESS_REQ  */
 287 void
 288 do_dc_join_filter_offer(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 289                         enum crmd_fsa_cause cause,
 290                         enum crmd_fsa_state cur_state,
 291                         enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 292 {
 293     xmlNode *generation = NULL;
 294 
 295     int cmp = 0;
 296     int join_id = -1;
 297     int count = 0;
 298     gboolean ack_nack_bool = TRUE;
 299     ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
 300 
 301     const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
 302     const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
 303     const char *join_version = crm_element_value(join_ack->msg,
 304                                                  XML_ATTR_CRM_VERSION);
 305     crm_node_t *join_node = NULL;
 306 
 307     if (join_from == NULL) {
 308         crm_err("Ignoring invalid join request without node name");
 309         return;
 310     }
 311     join_node = crm_get_peer(0, join_from);
 312 
 313     crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
 314     if (join_id != current_join_id) {
 315         crm_debug("Ignoring join-%d request from %s because we are on join-%d",
 316                   join_id, join_from, current_join_id);
 317         check_join_state(cur_state, __func__);
 318         return;
 319     }
 320 
 321     generation = join_ack->xml;
 322     if (max_generation_xml != NULL && generation != NULL) {
 323         int lpc = 0;
 324 
 325         const char *attributes[] = {
 326             XML_ATTR_GENERATION_ADMIN,
 327             XML_ATTR_GENERATION,
 328             XML_ATTR_NUMUPDATES,
 329         };
 330 
 331         for (lpc = 0; cmp == 0 && lpc < DIMOF(attributes); lpc++) {
 332             cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]);
 333         }
 334     }
 335 
 336     if (ref == NULL) {
 337         ref = "none"; // for logging only
 338     }
 339 
 340     if (crm_is_peer_active(join_node) == FALSE) {
 341         crm_err("Rejecting join-%d request from inactive node %s "
 342                 CRM_XS " ref=%s", join_id, join_from, ref);
 343         ack_nack_bool = FALSE;
 344 
 345     } else if (generation == NULL) {
 346         crm_err("Rejecting invalid join-%d request from node %s "
 347                 "missing CIB generation " CRM_XS " ref=%s",
 348                 join_id, join_from, ref);
 349         ack_nack_bool = FALSE;
 350 
 351     } else if ((join_version == NULL)
 352                || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
 353         crm_err("Rejecting join-%d request from node %s because feature set %s"
 354                 " is incompatible with ours (%s) " CRM_XS " ref=%s",
 355                 join_id, join_from, (join_version? join_version : "pre-3.1.0"),
 356                 CRM_FEATURE_SET, ref);
 357         ack_nack_bool = FALSE;
 358 
 359     } else if (max_generation_xml == NULL) {
 360         crm_debug("Accepting join-%d request from %s "
 361                   "(with first CIB generation) " CRM_XS " ref=%s",
 362                   join_id, join_from, ref);
 363         max_generation_xml = copy_xml(generation);
 364         max_generation_from = strdup(join_from);
 365 
 366     } else if (cmp < 0 || (cmp == 0 && pcmk__str_eq(join_from, fsa_our_uname, pcmk__str_casei))) {
 367         crm_debug("Accepting join-%d request from %s (with better "
 368                   "CIB generation than current best from %s) " CRM_XS " ref=%s",
 369                   join_id, join_from, max_generation_from, ref);
 370         crm_log_xml_debug(max_generation_xml, "Old max generation");
 371         crm_log_xml_debug(generation, "New max generation");
 372 
 373         free(max_generation_from);
 374         free_xml(max_generation_xml);
 375 
 376         max_generation_from = strdup(join_from);
 377         max_generation_xml = copy_xml(join_ack->xml);
 378 
 379     } else {
 380         crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
 381                   join_id, join_from, ref);
 382     }
 383 
 384     if (ack_nack_bool == FALSE) {
 385         crm_update_peer_join(__func__, join_node, crm_join_nack);
 386         crm_update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
 387     } else {
 388         crm_update_peer_join(__func__, join_node, crm_join_integrated);
 389         crm_update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
 390     }
 391 
 392     count = crmd_join_phase_count(crm_join_integrated);
 393     crm_debug("%d node%s currently integrated in join-%d",
 394               count, pcmk__plural_s(count), join_id);
 395 
 396     if (check_join_state(cur_state, __func__) == FALSE) {
 397         // Don't waste time by invoking the scheduler yet
 398         count = crmd_join_phase_count(crm_join_welcomed);
 399         crm_debug("Waiting on join-%d requests from %d outstanding node%s",
 400                   join_id, count, pcmk__plural_s(count));
 401     }
 402 }
 403 
 404 /*      A_DC_JOIN_FINALIZE      */
 405 void
 406 do_dc_join_finalize(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 407                     enum crmd_fsa_cause cause,
 408                     enum crmd_fsa_state cur_state,
 409                     enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 410 {
 411     char *sync_from = NULL;
 412     int rc = pcmk_ok;
 413     int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
 414     int count_integrated = crmd_join_phase_count(crm_join_integrated);
 415 
 416     /* This we can do straight away and avoid clients timing us out
 417      *  while we compute the latest CIB
 418      */
 419     if (count_welcomed != 0) {
 420         crm_debug("Waiting on join-%d requests from %d outstanding node%s "
 421                   "before finalizing join", current_join_id, count_welcomed,
 422                   pcmk__plural_s(count_welcomed));
 423         crmd_join_phase_log(LOG_DEBUG);
 424         /* crmd_fsa_stall(FALSE); Needed? */
 425         return;
 426 
 427     } else if (count_integrated == 0) {
 428         crm_debug("Finalization not needed for join-%d at the current time",
 429                   current_join_id);
 430         crmd_join_phase_log(LOG_DEBUG);
 431         check_join_state(fsa_state, __func__);
 432         return;
 433     }
 434 
 435     controld_clear_fsa_input_flags(R_HAVE_CIB);
 436     if (pcmk__str_eq(max_generation_from, fsa_our_uname, pcmk__str_null_matches | pcmk__str_casei)) {
 437         controld_set_fsa_input_flags(R_HAVE_CIB);
 438     }
 439 
 440     if (pcmk_is_set(fsa_input_register, R_IN_TRANSITION)) {
 441         crm_warn("Delaying join-%d finalization while transition in progress",
 442                  current_join_id);
 443         crmd_join_phase_log(LOG_DEBUG);
 444         crmd_fsa_stall(FALSE);
 445         return;
 446     }
 447 
 448     if (max_generation_from && !pcmk_is_set(fsa_input_register, R_HAVE_CIB)) {
 449         /* ask for the agreed best CIB */
 450         sync_from = strdup(max_generation_from);
 451         controld_set_fsa_input_flags(R_CIB_ASKED);
 452         crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
 453                    current_join_id, count_integrated,
 454                    pcmk__plural_s(count_integrated), sync_from);
 455         crm_log_xml_notice(max_generation_xml, "Requested CIB version");
 456 
 457     } else {
 458         /* Send _our_ CIB out to everyone */
 459         sync_from = strdup(fsa_our_uname);
 460         crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
 461                   current_join_id, count_integrated,
 462                   pcmk__plural_s(count_integrated));
 463         crm_log_xml_debug(max_generation_xml, "Requested CIB version");
 464     }
 465     crmd_join_phase_log(LOG_DEBUG);
 466 
 467     rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override);
 468     fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback);
 469 }
 470 
 471 void
 472 finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 473 {
 474     CRM_LOG_ASSERT(-EPERM != rc);
 475     controld_clear_fsa_input_flags(R_CIB_ASKED);
 476     if (rc != pcmk_ok) {
 477         do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
 478                    "Could not sync CIB from %s in join-%d: %s",
 479                    (char *) user_data, current_join_id, pcmk_strerror(rc));
 480 
 481         /* restart the whole join process */
 482         register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
 483                                __func__);
 484 
 485     } else if (!AM_I_DC) {
 486         crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
 487 
 488     } else if (fsa_state != S_FINALIZE_JOIN) {
 489         crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN (%s)",
 490                   current_join_id, fsa_state2string(fsa_state));
 491 
 492     } else {
 493         controld_set_fsa_input_flags(R_HAVE_CIB);
 494         controld_clear_fsa_input_flags(R_CIB_ASKED);
 495 
 496         /* make sure dc_uuid is re-set to us */
 497         if (check_join_state(fsa_state, __func__) == FALSE) {
 498             int count_integrated = crmd_join_phase_count(crm_join_integrated);
 499 
 500             crm_debug("Notifying %d node%s of join-%d results",
 501                       count_integrated, pcmk__plural_s(count_integrated),
 502                       current_join_id);
 503             g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
 504         }
 505     }
 506 }
 507 
 508 static void
 509 join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 510 {
 511     fsa_data_t *msg_data = NULL;
 512 
 513     if (rc == pcmk_ok) {
 514         crm_debug("join-%d node history update (via CIB call %d) complete",
 515                   current_join_id, call_id);
 516         check_join_state(fsa_state, __func__);
 517 
 518     } else {
 519         crm_err("join-%d node history update (via CIB call %d) failed: %s "
 520                 "(next transition may determine resource status incorrectly)",
 521                 current_join_id, call_id, pcmk_strerror(rc));
 522         crm_log_xml_debug(msg, "failed");
 523         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 524     }
 525 }
 526 
 527 /*      A_DC_JOIN_PROCESS_ACK   */
 528 void
 529 do_dc_join_ack(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 530                enum crmd_fsa_cause cause,
 531                enum crmd_fsa_state cur_state,
 532                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 533 {
 534     int join_id = -1;
 535     int call_id = 0;
 536     ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
 537     enum controld_section_e section = controld_section_lrm;
 538 
 539     const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
 540     const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
 541     crm_node_t *peer = NULL;
 542 
 543     // Sanity checks
 544     if (join_from == NULL) {
 545         crm_warn("Ignoring message received without node identification");
 546         return;
 547     }
 548     if (op == NULL) {
 549         crm_warn("Ignoring message received from %s without task", join_from);
 550         return;
 551     }
 552 
 553     if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
 554         crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
 555                   op, join_from, CRM_OP_JOIN_CONFIRM);
 556         return;
 557     }
 558 
 559     if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) {
 560         crm_warn("Ignoring join confirmation from %s without valid join ID",
 561                  join_from);
 562         return;
 563     }
 564 
 565     peer = crm_get_peer(0, join_from);
 566     if (peer->join != crm_join_finalized) {
 567         crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
 568                  "(currently %s not %s)",
 569                  join_id, join_from, crm_join_phase_str(peer->join),
 570                  crm_join_phase_str(crm_join_finalized));
 571         return;
 572     }
 573 
 574     if (join_id != current_join_id) {
 575         crm_err("Rejecting join-%d confirmation from %s "
 576                 "because currently on join-%d",
 577                 join_id, join_from, current_join_id);
 578         crm_update_peer_join(__func__, peer, crm_join_nack);
 579         return;
 580     }
 581 
 582     crm_update_peer_join(__func__, peer, crm_join_confirmed);
 583 
 584     /* Update CIB with node's current executor state. A new transition will be
 585      * triggered later, when the CIB notifies us of the change.
 586      */
 587     if (controld_shutdown_lock_enabled) {
 588         section = controld_section_lrm_unlocked;
 589     }
 590     controld_delete_node_state(join_from, section, cib_scope_local);
 591     if (pcmk__str_eq(join_from, fsa_our_uname, pcmk__str_casei)) {
 592         xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname);
 593 
 594         if (now_dc_lrmd_state != NULL) {
 595             fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state,
 596                 cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
 597             free_xml(now_dc_lrmd_state);
 598             crm_debug("Updating local node history for join-%d "
 599                       "from query result (via CIB call %d)", join_id, call_id);
 600         } else {
 601             fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
 602                 cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
 603             crm_warn("Updating local node history from join-%d confirmation "
 604                      "because query failed (via CIB call %d)", join_id, call_id);
 605         }
 606     } else {
 607         fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
 608            cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
 609         crm_debug("Updating node history for %s from join-%d confirmation "
 610                   "(via CIB call %d)", join_from, join_id, call_id);
 611     }
 612     fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback);
 613 }
 614 
 615 void
 616 finalize_join_for(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 617 {
 618     xmlNode *acknak = NULL;
 619     xmlNode *tmp1 = NULL;
 620     crm_node_t *join_node = value;
 621     const char *join_to = join_node->uname;
 622 
 623     if(join_node->join != crm_join_integrated) {
 624         crm_trace("Not updating non-integrated node %s (%s) for join-%d",
 625                   join_to, crm_join_phase_str(join_node->join),
 626                   current_join_id);
 627         return;
 628     }
 629 
 630     crm_trace("Updating node state for %s", join_to);
 631     tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
 632     set_uuid(tmp1, XML_ATTR_UUID, join_node);
 633     crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
 634     fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1);
 635     free_xml(tmp1);
 636 
 637     join_node = crm_get_peer(0, join_to);
 638     if (crm_is_peer_active(join_node) == FALSE) {
 639         /*
 640          * NACK'ing nodes that the membership layer doesn't know about yet
 641          * simply creates more churn
 642          *
 643          * Better to leave them waiting and let the join restart when
 644          * the new membership event comes in
 645          *
 646          * All other NACKs (due to versions etc) should still be processed
 647          */
 648         crm_update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
 649         return;
 650     }
 651 
 652     // Acknowledge node's join request
 653     crm_debug("Acknowledging join-%d request from %s",
 654               current_join_id, join_to);
 655     acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
 656     crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE);
 657     crm_update_peer_join(__func__, join_node, crm_join_finalized);
 658     crm_update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
 659 
 660     send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, acknak, TRUE);
 661     free_xml(acknak);
 662     return;
 663 }
 664 
 665 gboolean
 666 check_join_state(enum crmd_fsa_state cur_state, const char *source)
     /* [previous][next][first][last][top][bottom][index][help] */
 667 {
 668     static unsigned long long highest_seq = 0;
 669 
 670     if (saved_ccm_membership_id != crm_peer_seq) {
 671         crm_debug("join-%d: Membership changed from %llu to %llu "
 672                   CRM_XS " highest=%llu state=%s for=%s",
 673                   current_join_id, saved_ccm_membership_id, crm_peer_seq, highest_seq,
 674                   fsa_state2string(cur_state), source);
 675         if(highest_seq < crm_peer_seq) {
 676             /* Don't spam the FSA with duplicates */
 677             highest_seq = crm_peer_seq;
 678             register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
 679         }
 680 
 681     } else if (cur_state == S_INTEGRATION) {
 682         if (crmd_join_phase_count(crm_join_welcomed) == 0) {
 683             int count = crmd_join_phase_count(crm_join_integrated);
 684 
 685             crm_debug("join-%d: Integration of %d peer%s complete "
 686                       CRM_XS " state=%s for=%s",
 687                       current_join_id, count, pcmk__plural_s(count),
 688                       fsa_state2string(cur_state), source);
 689             register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
 690             return TRUE;
 691         }
 692 
 693     } else if (cur_state == S_FINALIZE_JOIN) {
 694         if (!pcmk_is_set(fsa_input_register, R_HAVE_CIB)) {
 695             crm_debug("join-%d: Delaying finalization until we have CIB "
 696                       CRM_XS " state=%s for=%s",
 697                       current_join_id, fsa_state2string(cur_state), source);
 698             return TRUE;
 699 
 700         } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
 701             int count = crmd_join_phase_count(crm_join_welcomed);
 702 
 703             crm_debug("join-%d: Still waiting on %d welcomed node%s "
 704                       CRM_XS " state=%s for=%s",
 705                       current_join_id, count, pcmk__plural_s(count),
 706                       fsa_state2string(cur_state), source);
 707             crmd_join_phase_log(LOG_DEBUG);
 708 
 709         } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
 710             int count = crmd_join_phase_count(crm_join_integrated);
 711 
 712             crm_debug("join-%d: Still waiting on %d integrated node%s "
 713                       CRM_XS " state=%s for=%s",
 714                       current_join_id, count, pcmk__plural_s(count),
 715                       fsa_state2string(cur_state), source);
 716             crmd_join_phase_log(LOG_DEBUG);
 717 
 718         } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
 719             int count = crmd_join_phase_count(crm_join_finalized);
 720 
 721             crm_debug("join-%d: Still waiting on %d finalized node%s "
 722                       CRM_XS " state=%s for=%s",
 723                       current_join_id, count, pcmk__plural_s(count),
 724                       fsa_state2string(cur_state), source);
 725             crmd_join_phase_log(LOG_DEBUG);
 726 
 727         } else {
 728             crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
 729                       current_join_id, fsa_state2string(cur_state), source);
 730             register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
 731             return TRUE;
 732         }
 733     }
 734 
 735     return FALSE;
 736 }
 737 
 738 void
 739 do_dc_join_final(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 740                  enum crmd_fsa_cause cause,
 741                  enum crmd_fsa_state cur_state,
 742                  enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 743 {
 744     crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
 745     crm_update_quorum(crm_have_quorum, TRUE);
 746 }
 747 
 748 int crmd_join_phase_count(enum crm_join_phase phase)
     /* [previous][next][first][last][top][bottom][index][help] */
 749 {
 750     int count = 0;
 751     crm_node_t *peer;
 752     GHashTableIter iter;
 753 
 754     g_hash_table_iter_init(&iter, crm_peer_cache);
 755     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
 756         if(peer->join == phase) {
 757             count++;
 758         }
 759     }
 760     return count;
 761 }
 762 
 763 void crmd_join_phase_log(int level)
     /* [previous][next][first][last][top][bottom][index][help] */
 764 {
 765     crm_node_t *peer;
 766     GHashTableIter iter;
 767 
 768     g_hash_table_iter_init(&iter, crm_peer_cache);
 769     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
 770         do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
 771                    crm_join_phase_str(peer->join));
 772     }
 773 }

/* [previous][next][first][last][top][bottom][index][help] */