root/daemons/controld/controld_join_dc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. join_phase_text
  2. controld_destroy_failed_sync_table
  3. controld_remove_failed_sync_node
  4. record_failed_sync_node
  5. lookup_failed_sync_node
  6. crm_update_peer_join
  7. start_join_round
  8. create_dc_message
  9. join_make_offer
  10. do_dc_join_offer_all
  11. do_dc_join_offer_one
  12. compare_int_fields
  13. do_dc_join_filter_offer
  14. do_dc_join_finalize
  15. free_max_generation
  16. finalize_sync_callback
  17. join_node_state_commit_callback
  18. do_dc_join_ack
  19. finalize_join_for
  20. check_join_state
  21. do_dc_join_final
  22. crmd_join_phase_count
  23. crmd_join_phase_log

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <inttypes.h>               // PRIu32
  13 #include <stdbool.h>                // bool, true, false
  14 #include <stdio.h>                  // NULL
  15 #include <stdlib.h>                 // free(), etc.
  16 
  17 #include <glib.h>                   // gboolean, etc.
  18 #include <libxml/tree.h>            // xmlNode
  19 
  20 #include <crm/crm.h>
  21 
  22 #include <crm/common/xml.h>
  23 #include <crm/cluster.h>
  24 
  25 #include <pacemaker-controld.h>
  26 
  27 static char *max_generation_from = NULL;
  28 static xmlNodePtr max_generation_xml = NULL;
  29 
  30 /*!
  31  * \internal
  32  * \brief Nodes from which a CIB sync has failed since the peer joined
  33  *
  34  * This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
  35  * the name of a client node from which a CIB \p sync_from() call has failed in
  36  * \p do_dc_join_finalize() since the client joined the cluster as a peer.
  37  * \p join_id is the ID of the join round in which the \p sync_from() failed,
  38  * and is intended for use in nack log messages.
  39  */
  40 static GHashTable *failed_sync_nodes = NULL;
  41 
  42 void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
  43 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
  44 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
  45 
  46 /* Numeric counter used to identify join rounds (an unsigned int would be
  47  * appropriate, except we get and set it in XML as int)
  48  */
  49 static int current_join_id = 0;
  50 
  51 /*!
  52  * \internal
  53  * \brief Get log-friendly string equivalent of a controller group join phase
  54  *
  55  * \param[in] phase  Join phase
  56  *
  57  * \return Log-friendly string equivalent of \p phase
  58  */
  59 static const char *
  60 join_phase_text(enum controld_join_phase phase)
     /* [previous][next][first][last][top][bottom][index][help] */
  61 {
  62     switch (phase) {
  63         case controld_join_nack:
  64             return "nack";
  65         case controld_join_none:
  66             return "none";
  67         case controld_join_welcomed:
  68             return "welcomed";
  69         case controld_join_integrated:
  70             return "integrated";
  71         case controld_join_finalized:
  72             return "finalized";
  73         case controld_join_confirmed:
  74             return "confirmed";
  75         default:
  76             return "invalid";
  77     }
  78 }
  79 
  80 /*!
  81  * \internal
  82  * \brief Destroy the hash table containing failed sync nodes
  83  */
  84 void
  85 controld_destroy_failed_sync_table(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  86 {
  87     if (failed_sync_nodes != NULL) {
  88         g_hash_table_destroy(failed_sync_nodes);
  89         failed_sync_nodes = NULL;
  90     }
  91 }
  92 
  93 /*!
  94  * \internal
  95  * \brief Remove a node from the failed sync nodes table if present
  96  *
  97  * \param[in] node_name  Node name to remove
  98  */
  99 void
 100 controld_remove_failed_sync_node(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 101 {
 102     if (failed_sync_nodes != NULL) {
 103         g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
 104     }
 105 }
 106 
 107 /*!
 108  * \internal
 109  * \brief Add to a hash table a node whose CIB failed to sync
 110  *
 111  * \param[in] node_name  Name of node whose CIB failed to sync
 112  * \param[in] join_id    Join round when the failure occurred
 113  */
 114 static void
 115 record_failed_sync_node(const char *node_name, gint join_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 116 {
 117     if (failed_sync_nodes == NULL) {
 118         failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
 119     }
 120 
 121     /* If the node is already in the table then we failed to nack it during the
 122      * filter offer step
 123      */
 124     CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
 125                                        GINT_TO_POINTER(join_id)));
 126 }
 127 
 128 /*!
 129  * \internal
 130  * \brief Look up a node name in the failed sync table
 131  *
 132  * \param[in]  node_name  Name of node to look up
 133  * \param[out] join_id    Where to store the join ID of when the sync failed
 134  *
 135  * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
 136  *         node name was found, or \p pcmk_rc_node_unknown otherwise.
 137  * \note \p *join_id is set to -1 if the node is not found.
 138  */
 139 static int
 140 lookup_failed_sync_node(const char *node_name, gint *join_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 141 {
 142     *join_id = -1;
 143 
 144     if (failed_sync_nodes != NULL) {
 145         gpointer result = g_hash_table_lookup(failed_sync_nodes,
 146                                               (gchar *) node_name);
 147         if (result != NULL) {
 148             *join_id = GPOINTER_TO_INT(result);
 149             return pcmk_rc_ok;
 150         }
 151     }
 152     return pcmk_rc_node_unknown;
 153 }
 154 
 155 void
 156 crm_update_peer_join(const char *source, pcmk__node_status_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
 157                      enum controld_join_phase phase)
 158 {
 159     enum controld_join_phase last = controld_get_join_phase(node);
 160 
 161     CRM_CHECK(node != NULL, return);
 162 
 163     /* Remote nodes do not participate in joins */
 164     if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
 165         return;
 166     }
 167 
 168     if (phase == last) {
 169         crm_trace("Node %s join-%d phase is still %s "
 170                   QB_XS " nodeid=%" PRIu32 " source=%s",
 171                   node->name, current_join_id, join_phase_text(last),
 172                   node->cluster_layer_id, source);
 173         return;
 174     }
 175 
 176     if ((phase <= controld_join_none) || (phase == (last + 1))) {
 177         struct controld_node_status_data *data = NULL;
 178 
 179         if (node->user_data == NULL) {
 180             node->user_data =
 181                 pcmk__assert_alloc(1, sizeof(struct controld_node_status_data));
 182         }
 183         data = node->user_data;
 184         data->join_phase = phase;
 185 
 186         crm_trace("Node %s join-%d phase is now %s (was %s) "
 187                   QB_XS " nodeid=%" PRIu32 " source=%s",
 188                   node->name, current_join_id, join_phase_text(phase),
 189                   join_phase_text(last), node->cluster_layer_id,
 190                   source);
 191         return;
 192     }
 193 
 194     crm_warn("Rejecting join-%d phase update for node %s because can't go from "
 195              "%s to %s " QB_XS " nodeid=%" PRIu32 " source=%s",
 196              current_join_id, node->name, join_phase_text(last),
 197              join_phase_text(phase), node->cluster_layer_id, source);
 198 }
 199 
 200 static void
 201 start_join_round(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 202 {
 203     GHashTableIter iter;
 204     pcmk__node_status_t *peer = NULL;
 205 
 206     crm_debug("Starting new join round join-%d", current_join_id);
 207 
 208     g_hash_table_iter_init(&iter, pcmk__peer_cache);
 209     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
 210         crm_update_peer_join(__func__, peer, controld_join_none);
 211     }
 212     if (max_generation_from != NULL) {
 213         free(max_generation_from);
 214         max_generation_from = NULL;
 215     }
 216     if (max_generation_xml != NULL) {
 217         pcmk__xml_free(max_generation_xml);
 218         max_generation_xml = NULL;
 219     }
 220     controld_clear_fsa_input_flags(R_HAVE_CIB);
 221 }
 222 
 223 /*!
 224  * \internal
 225  * \brief Create a join message from the DC
 226  *
 227  * \param[in] join_op  Join operation name
 228  * \param[in] host_to  Recipient of message
 229  */
 230 static xmlNode *
 231 create_dc_message(const char *join_op, const char *host_to)
     /* [previous][next][first][last][top][bottom][index][help] */
 232 {
 233     xmlNode *msg = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_DC, host_to,
 234                                      CRM_SYSTEM_CRMD, join_op, NULL);
 235 
 236     /* Identify which election this is a part of */
 237     crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id);
 238 
 239     /* Add a field specifying whether the DC is shutting down. This keeps the
 240      * joining node from fencing the old DC if it becomes the new DC.
 241      */
 242     pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING,
 243                            pcmk_is_set(controld_globals.fsa_input_register,
 244                                        R_SHUTDOWN));
 245     return msg;
 246 }
 247 
 248 static void
 249 join_make_offer(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 250 {
 251     /* @TODO We don't use user_data except to distinguish one particular call
 252      * from others. Make this clearer.
 253      */
 254     xmlNode *offer = NULL;
 255     pcmk__node_status_t *member = (pcmk__node_status_t *) value;
 256 
 257     pcmk__assert(member != NULL);
 258     if (!pcmk__cluster_is_node_active(member)) {
 259         crm_info("Not making join-%d offer to inactive node %s",
 260                  current_join_id, pcmk__s(member->name, "with unknown name"));
 261         if ((member->expected == NULL)
 262             && pcmk__str_eq(member->state, PCMK__VALUE_LOST, pcmk__str_none)) {
 263             /* You would think this unsafe, but in fact this plus an
 264              * active resource is what causes it to be fenced.
 265              *
 266              * Yes, this does mean that any node that dies at the same
 267              * time as the old DC and is not running resource (still)
 268              * won't be fenced.
 269              *
 270              * I'm not happy about this either.
 271              */
 272             pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
 273         }
 274         return;
 275     }
 276 
 277     if (member->name == NULL) {
 278         crm_info("Not making join-%d offer to node uuid %s with unknown name",
 279                  current_join_id, member->xml_id);
 280         return;
 281     }
 282 
 283     if (controld_globals.membership_id != controld_globals.peer_seq) {
 284         controld_globals.membership_id = controld_globals.peer_seq;
 285         crm_info("Making join-%d offers based on membership event %llu",
 286                  current_join_id, controld_globals.peer_seq);
 287     }
 288 
 289     if (user_data != NULL) {
 290         enum controld_join_phase phase = controld_get_join_phase(member);
 291 
 292         if (phase > controld_join_none) {
 293             crm_info("Not making join-%d offer to already known node %s (%s)",
 294                      current_join_id, member->name, join_phase_text(phase));
 295             return;
 296         }
 297     }
 298 
 299     crm_update_peer_join(__func__, (pcmk__node_status_t*) member,
 300                          controld_join_none);
 301 
 302     offer = create_dc_message(CRM_OP_JOIN_OFFER, member->name);
 303 
 304     // Advertise our feature set so the joining node can bail if not compatible
 305     crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
 306 
 307     crm_info("Sending join-%d offer to %s", current_join_id, member->name);
 308     pcmk__cluster_send_message(member, pcmk_ipc_controld, offer);
 309     pcmk__xml_free(offer);
 310 
 311     crm_update_peer_join(__func__, member, controld_join_welcomed);
 312 }
 313 
 314 /*       A_DC_JOIN_OFFER_ALL    */
 315 void
 316 do_dc_join_offer_all(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 317                      enum crmd_fsa_cause cause,
 318                      enum crmd_fsa_state cur_state,
 319                      enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 320 {
 321     int count;
 322 
 323     /* Reset everyone's status back to down or in_ccm in the CIB.
 324      * Any nodes that are active in the CIB but not in the cluster membership
 325      * will be seen as offline by the scheduler anyway.
 326      */
 327     current_join_id++;
 328     start_join_round();
 329 
 330     update_dc(NULL);
 331     if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
 332         crm_info("A new node joined the cluster");
 333     }
 334     g_hash_table_foreach(pcmk__peer_cache, join_make_offer, NULL);
 335 
 336     count = crmd_join_phase_count(controld_join_welcomed);
 337     crm_info("Waiting on join-%d requests from %d outstanding node%s",
 338              current_join_id, count, pcmk__plural_s(count));
 339 
 340     // Don't waste time by invoking the scheduler yet
 341 }
 342 
 343 /*       A_DC_JOIN_OFFER_ONE    */
 344 void
 345 do_dc_join_offer_one(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 346                      enum crmd_fsa_cause cause,
 347                      enum crmd_fsa_state cur_state,
 348                      enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 349 {
 350     pcmk__node_status_t *member = NULL;
 351     ha_msg_input_t *welcome = NULL;
 352     int count;
 353     const char *join_to = NULL;
 354 
 355     if (msg_data->data == NULL) {
 356         crm_info("Making join-%d offers to any unconfirmed nodes "
 357                  "because an unknown node joined", current_join_id);
 358         g_hash_table_foreach(pcmk__peer_cache, join_make_offer, &member);
 359         check_join_state(cur_state, __func__);
 360         return;
 361     }
 362 
 363     welcome = fsa_typed_data(fsa_dt_ha_msg);
 364     if (welcome == NULL) {
 365         // fsa_typed_data() already logged an error
 366         return;
 367     }
 368 
 369     join_to = crm_element_value(welcome->msg, PCMK__XA_SRC);
 370     if (join_to == NULL) {
 371         crm_err("Can't make join-%d offer to unknown node", current_join_id);
 372         return;
 373     }
 374     member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);
 375 
 376     /* It is possible that a node will have been sick or starting up when the
 377      * original offer was made. However, it will either re-announce itself in
 378      * due course, or we can re-store the original offer on the client.
 379      */
 380 
 381     crm_update_peer_join(__func__, member, controld_join_none);
 382     join_make_offer(NULL, member, NULL);
 383 
 384     /* If the offer isn't to the local node, make an offer to the local node as
 385      * well, to ensure the correct value for max_generation_from.
 386      */
 387     if (!controld_is_local_node(join_to)) {
 388         member = controld_get_local_node_status();
 389         join_make_offer(NULL, member, NULL);
 390     }
 391 
 392     /* This was a genuine join request; cancel any existing transition and
 393      * invoke the scheduler.
 394      */
 395     abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
 396                      NULL);
 397 
 398     count = crmd_join_phase_count(controld_join_welcomed);
 399     crm_info("Waiting on join-%d requests from %d outstanding node%s",
 400              current_join_id, count, pcmk__plural_s(count));
 401 
 402     // Don't waste time by invoking the scheduler yet
 403 }
 404 
 405 static int
 406 compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
     /* [previous][next][first][last][top][bottom][index][help] */
 407 {
 408     const char *elem_l = crm_element_value(left, field);
 409     const char *elem_r = crm_element_value(right, field);
 410 
 411     long long int_elem_l;
 412     long long int_elem_r;
 413 
 414     int rc = pcmk_rc_ok;
 415 
 416     rc = pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
 417     if (rc != pcmk_rc_ok) { // Shouldn't be possible
 418         crm_warn("Comparing current CIB %s as -1 "
 419                  "because '%s' is not an integer", field, elem_l);
 420     }
 421 
 422     rc = pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
 423     if (rc != pcmk_rc_ok) { // Shouldn't be possible
 424         crm_warn("Comparing joining node's CIB %s as -1 "
 425                  "because '%s' is not an integer", field, elem_r);
 426     }
 427 
 428     if (int_elem_l < int_elem_r) {
 429         return -1;
 430 
 431     } else if (int_elem_l > int_elem_r) {
 432         return 1;
 433     }
 434 
 435     return 0;
 436 }
 437 
 438 /*       A_DC_JOIN_PROCESS_REQ  */
 439 void
 440 do_dc_join_filter_offer(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 441                         enum crmd_fsa_cause cause,
 442                         enum crmd_fsa_state cur_state,
 443                         enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 444 {
 445     xmlNode *generation = NULL;
 446 
 447     int cmp = 0;
 448     int join_id = -1;
 449     int count = 0;
 450     gint value = 0;
 451     gboolean ack_nack_bool = TRUE;
 452     ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
 453 
 454     const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC);
 455     const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE);
 456     const char *join_version = crm_element_value(join_ack->msg,
 457                                                  PCMK_XA_CRM_FEATURE_SET);
 458     pcmk__node_status_t *join_node = NULL;
 459 
 460     if (join_from == NULL) {
 461         crm_err("Ignoring invalid join request without node name");
 462         return;
 463     }
 464     join_node = pcmk__get_node(0, join_from, NULL,
 465                                pcmk__node_search_cluster_member);
 466 
 467     crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
 468     if (join_id != current_join_id) {
 469         crm_debug("Ignoring join-%d request from %s because we are on join-%d",
 470                   join_id, join_from, current_join_id);
 471         check_join_state(cur_state, __func__);
 472         return;
 473     }
 474 
 475     generation = join_ack->xml;
 476     if (max_generation_xml != NULL && generation != NULL) {
 477         int lpc = 0;
 478 
 479         const char *attributes[] = {
 480             PCMK_XA_ADMIN_EPOCH,
 481             PCMK_XA_EPOCH,
 482             PCMK_XA_NUM_UPDATES,
 483         };
 484 
 485         /* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE
 486          * element from the join client. The "if" guard is for clarity.
 487          */
 488         if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
 489             for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
 490                 cmp = compare_int_fields(max_generation_xml, generation,
 491                                          attributes[lpc]);
 492             }
 493 
 494         } else {    // Should always be PCMK__XE_GENERATION_TUPLE
 495             CRM_LOG_ASSERT(false);
 496         }
 497     }
 498 
 499     if (ref == NULL) {
 500         ref = "none"; // for logging only
 501     }
 502 
 503     if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
 504         crm_err("Rejecting join-%d request from node %s because we failed to "
 505                 "sync its CIB in join-%d " QB_XS " ref=%s",
 506                 join_id, join_from, value, ref);
 507         ack_nack_bool = FALSE;
 508 
 509     } else if (!pcmk__cluster_is_node_active(join_node)) {
 510         if (match_down_event(join_from) != NULL) {
 511             /* The join request was received after the node was fenced or
 512              * otherwise shutdown in a way that we're aware of. No need to log
 513              * an error in this rare occurrence; we know the client was recently
 514              * shut down, and receiving a lingering in-flight request is not
 515              * cause for alarm.
 516              */
 517             crm_debug("Rejecting join-%d request from inactive node %s "
 518                       QB_XS " ref=%s", join_id, join_from, ref);
 519         } else {
 520             crm_err("Rejecting join-%d request from inactive node %s "
 521                     QB_XS " ref=%s", join_id, join_from, ref);
 522         }
 523         ack_nack_bool = FALSE;
 524 
 525     } else if (generation == NULL) {
 526         crm_err("Rejecting invalid join-%d request from node %s "
 527                 "missing CIB generation " QB_XS " ref=%s",
 528                 join_id, join_from, ref);
 529         ack_nack_bool = FALSE;
 530 
 531     } else if ((join_version == NULL)
 532                || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
 533         crm_err("Rejecting join-%d request from node %s because feature set %s"
 534                 " is incompatible with ours (%s) " QB_XS " ref=%s",
 535                 join_id, join_from, (join_version? join_version : "pre-3.1.0"),
 536                 CRM_FEATURE_SET, ref);
 537         ack_nack_bool = FALSE;
 538 
 539     } else if (max_generation_xml == NULL) {
 540         const char *validation = crm_element_value(generation,
 541                                                    PCMK_XA_VALIDATE_WITH);
 542 
 543         if (pcmk__get_schema(validation) == NULL) {
 544             crm_err("Rejecting join-%d request from %s (with first CIB "
 545                     "generation) due to %s schema version %s " QB_XS " ref=%s",
 546                     join_id, join_from,
 547                     ((validation == NULL)? "missing" : "unknown"),
 548                     pcmk__s(validation, ""), ref);
 549             ack_nack_bool = FALSE;
 550 
 551         } else {
 552             crm_debug("Accepting join-%d request from %s (with first CIB "
 553                       "generation) " QB_XS " ref=%s",
 554                       join_id, join_from, ref);
 555             max_generation_xml = pcmk__xml_copy(NULL, generation);
 556             pcmk__str_update(&max_generation_from, join_from);
 557         }
 558 
 559     } else if ((cmp < 0)
 560                || ((cmp == 0) && controld_is_local_node(join_from))) {
 561         const char *validation = crm_element_value(generation,
 562                                                    PCMK_XA_VALIDATE_WITH);
 563 
 564         if (pcmk__get_schema(validation) == NULL) {
 565             crm_err("Rejecting join-%d request from %s (with better CIB "
 566                     "generation than current best from %s) due to %s "
 567                     "schema version %s " QB_XS " ref=%s",
 568                     join_id, join_from, max_generation_from,
 569                     ((validation == NULL)? "missing" : "unknown"),
 570                     pcmk__s(validation, ""), ref);
 571             ack_nack_bool = FALSE;
 572 
 573         } else {
 574             crm_debug("Accepting join-%d request from %s (with better CIB "
 575                       "generation than current best from %s) " QB_XS " ref=%s",
 576                       join_id, join_from, max_generation_from, ref);
 577             crm_log_xml_debug(max_generation_xml, "Old max generation");
 578             crm_log_xml_debug(generation, "New max generation");
 579 
 580             pcmk__xml_free(max_generation_xml);
 581             max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
 582             pcmk__str_update(&max_generation_from, join_from);
 583         }
 584 
 585     } else {
 586         crm_debug("Accepting join-%d request from %s " QB_XS " ref=%s",
 587                   join_id, join_from, ref);
 588     }
 589 
 590     if (!ack_nack_bool) {
 591         crm_update_peer_join(__func__, join_node, controld_join_nack);
 592         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
 593 
 594     } else {
 595         crm_update_peer_join(__func__, join_node, controld_join_integrated);
 596         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
 597     }
 598 
 599     count = crmd_join_phase_count(controld_join_integrated);
 600     crm_debug("%d node%s currently integrated in join-%d",
 601               count, pcmk__plural_s(count), join_id);
 602 
 603     if (check_join_state(cur_state, __func__) == FALSE) {
 604         // Don't waste time by invoking the scheduler yet
 605         count = crmd_join_phase_count(controld_join_welcomed);
 606         crm_debug("Waiting on join-%d requests from %d outstanding node%s",
 607                   join_id, count, pcmk__plural_s(count));
 608     }
 609 }
 610 
 611 /*      A_DC_JOIN_FINALIZE      */
 612 void
 613 do_dc_join_finalize(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 614                     enum crmd_fsa_cause cause,
 615                     enum crmd_fsa_state cur_state,
 616                     enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 617 {
 618     char *sync_from = NULL;
 619     int rc = pcmk_ok;
 620     int count_welcomed = crmd_join_phase_count(controld_join_welcomed);
 621     int count_finalizable = crmd_join_phase_count(controld_join_integrated)
 622                             + crmd_join_phase_count(controld_join_nack);
 623 
 624     /* This we can do straight away and avoid clients timing us out
 625      *  while we compute the latest CIB
 626      */
 627     if (count_welcomed != 0) {
 628         crm_debug("Waiting on join-%d requests from %d outstanding node%s "
 629                   "before finalizing join", current_join_id, count_welcomed,
 630                   pcmk__plural_s(count_welcomed));
 631         crmd_join_phase_log(LOG_DEBUG);
 632         /* crmd_fsa_stall(FALSE); Needed? */
 633         return;
 634 
 635     } else if (count_finalizable == 0) {
 636         crm_debug("Finalization not needed for join-%d at the current time",
 637                   current_join_id);
 638         crmd_join_phase_log(LOG_DEBUG);
 639         check_join_state(controld_globals.fsa_state, __func__);
 640         return;
 641     }
 642 
 643     controld_clear_fsa_input_flags(R_HAVE_CIB);
 644     if ((max_generation_from == NULL)
 645         || controld_is_local_node(max_generation_from)) {
 646         controld_set_fsa_input_flags(R_HAVE_CIB);
 647     }
 648 
 649     if (!controld_globals.transition_graph->complete) {
 650         crm_warn("Delaying join-%d finalization while transition in progress",
 651                  current_join_id);
 652         crmd_join_phase_log(LOG_DEBUG);
 653         crmd_fsa_stall(FALSE);
 654         return;
 655     }
 656 
 657     if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
 658         // Send our CIB out to everyone
 659         sync_from = pcmk__str_copy(controld_globals.cluster->priv->node_name);
 660         crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
 661                   current_join_id, count_finalizable,
 662                   pcmk__plural_s(count_finalizable));
 663         crm_log_xml_debug(max_generation_xml, "Requested CIB version");
 664 
 665     } else {
 666         // Ask for the agreed best CIB
 667         sync_from = pcmk__str_copy(max_generation_from);
 668         crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
 669                    current_join_id, count_finalizable,
 670                    pcmk__plural_s(count_finalizable), sync_from);
 671         crm_log_xml_notice(max_generation_xml, "Requested CIB version");
 672     }
 673     crmd_join_phase_log(LOG_DEBUG);
 674 
 675     rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
 676                                                     sync_from, NULL, cib_none);
 677     fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
 678 }
 679 
 680 void
 681 free_max_generation(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 682 {
 683     free(max_generation_from);
 684     max_generation_from = NULL;
 685 
 686     pcmk__xml_free(max_generation_xml);
 687     max_generation_xml = NULL;
 688 }
 689 
 690 void
 691 finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 692 {
 693     CRM_LOG_ASSERT(-EPERM != rc);
 694 
 695     if (rc != pcmk_ok) {
 696         const char *sync_from = (const char *) user_data;
 697 
 698         do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
 699                    "Could not sync CIB from %s in join-%d: %s",
 700                    sync_from, current_join_id, pcmk_strerror(rc));
 701 
 702         if (rc != -pcmk_err_old_data) {
 703             record_failed_sync_node(sync_from, current_join_id);
 704         }
 705 
 706         /* restart the whole join process */
 707         register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
 708                                __func__);
 709 
 710     } else if (!AM_I_DC) {
 711         crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
 712 
 713     } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
 714         crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
 715                   "(%s)", current_join_id,
 716                   fsa_state2string(controld_globals.fsa_state));
 717 
 718     } else {
 719         controld_set_fsa_input_flags(R_HAVE_CIB);
 720 
 721         /* make sure dc_uuid is re-set to us */
 722         if (!check_join_state(controld_globals.fsa_state, __func__)) {
 723             int count_finalizable = 0;
 724 
 725             count_finalizable = crmd_join_phase_count(controld_join_integrated)
 726                                 + crmd_join_phase_count(controld_join_nack);
 727 
 728             crm_debug("Notifying %d node%s of join-%d results",
 729                       count_finalizable, pcmk__plural_s(count_finalizable),
 730                       current_join_id);
 731             g_hash_table_foreach(pcmk__peer_cache, finalize_join_for, NULL);
 732         }
 733     }
 734 }
 735 
 736 static void
 737 join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
     /* [previous][next][first][last][top][bottom][index][help] */
 738                                 xmlNode *output, void *user_data)
 739 {
 740     const char *node = user_data;
 741 
 742     if (rc != pcmk_ok) {
 743         fsa_data_t *msg_data = NULL;    // for register_fsa_error() macro
 744 
 745         crm_crit("join-%d node history update (via CIB call %d) for node %s "
 746                  "failed: %s",
 747                  current_join_id, call_id, node, pcmk_strerror(rc));
 748         crm_log_xml_debug(msg, "failed");
 749         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 750     }
 751 
 752     crm_debug("join-%d node history update (via CIB call %d) for node %s "
 753               "complete",
 754               current_join_id, call_id, node);
 755     check_join_state(controld_globals.fsa_state, __func__);
 756 }
 757 
 758 /*      A_DC_JOIN_PROCESS_ACK   */
 759 void
 760 do_dc_join_ack(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 761                enum crmd_fsa_cause cause,
 762                enum crmd_fsa_state cur_state,
 763                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 764 {
 765     int join_id = -1;
 766     ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
 767 
 768     const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK);
 769     char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC);
 770     pcmk__node_status_t *peer = NULL;
 771     enum controld_join_phase phase = controld_join_none;
 772 
 773     enum controld_section_e section = controld_section_lrm;
 774     char *xpath = NULL;
 775     xmlNode *state = join_ack->xml;
 776     xmlNode *execd_state = NULL;
 777 
 778     cib_t *cib = controld_globals.cib_conn;
 779     int rc = pcmk_ok;
 780 
 781     // Sanity checks
 782     if (join_from == NULL) {
 783         crm_warn("Ignoring message received without node identification");
 784         goto done;
 785     }
 786     if (op == NULL) {
 787         crm_warn("Ignoring message received from %s without task", join_from);
 788         goto done;
 789     }
 790 
 791     if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
 792         crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
 793                   op, join_from, CRM_OP_JOIN_CONFIRM);
 794         goto done;
 795     }
 796 
 797     if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) {
 798         crm_warn("Ignoring join confirmation from %s without valid join ID",
 799                  join_from);
 800         goto done;
 801     }
 802 
 803     peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
 804     phase = controld_get_join_phase(peer);
 805     if (phase != controld_join_finalized) {
 806         crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
 807                  "(currently %s not %s)",
 808                  join_id, join_from, join_phase_text(phase),
 809                  join_phase_text(controld_join_finalized));
 810         goto done;
 811     }
 812 
 813     if (join_id != current_join_id) {
 814         crm_err("Rejecting join-%d confirmation from %s "
 815                 "because currently on join-%d",
 816                 join_id, join_from, current_join_id);
 817         crm_update_peer_join(__func__, peer, controld_join_nack);
 818         goto done;
 819     }
 820 
 821     crm_update_peer_join(__func__, peer, controld_join_confirmed);
 822 
 823     /* Update CIB with node's current executor state. A new transition will be
 824      * triggered later, when the CIB manager notifies us of the change.
 825      *
 826      * The delete and modify requests are part of an atomic transaction.
 827      */
 828     rc = cib->cmds->init_transaction(cib);
 829     if (rc != pcmk_ok) {
 830         goto done;
 831     }
 832 
 833     // Delete relevant parts of node's current executor state from CIB
 834     if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
 835         section = controld_section_lrm_unlocked;
 836     }
 837     controld_node_state_deletion_strings(join_from, section, &xpath, NULL);
 838 
 839     rc = cib->cmds->remove(cib, xpath, NULL,
 840                            cib_xpath|cib_multiple|cib_transaction);
 841     if (rc != pcmk_ok) {
 842         goto done;
 843     }
 844 
 845     // Update CIB with node's latest known executor state
 846     if (controld_is_local_node(join_from)) {
 847 
 848         // Use the latest possible state if processing our own join ack
 849         execd_state = controld_query_executor_state();
 850 
 851         if (execd_state != NULL) {
 852             crm_debug("Updating local node history for join-%d from query "
 853                       "result",
 854                       current_join_id);
 855             state = execd_state;
 856 
 857         } else {
 858             crm_warn("Updating local node history from join-%d confirmation "
 859                      "because query failed",
 860                      current_join_id);
 861         }
 862 
 863     } else {
 864         crm_debug("Updating node history for %s from join-%d confirmation",
 865                   join_from, current_join_id);
 866     }
 867 
 868     rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state,
 869                            cib_can_create|cib_transaction);
 870     pcmk__xml_free(execd_state);
 871     if (rc != pcmk_ok) {
 872         goto done;
 873     }
 874 
 875     // Commit the transaction
 876     rc = cib->cmds->end_transaction(cib, true, cib_none);
 877     fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);
 878 
 879     if (rc > 0) {
 880         // join_from will be freed after callback
 881         join_from = NULL;
 882         rc = pcmk_ok;
 883     }
 884 
 885 done:
 886     if (rc != pcmk_ok) {
 887         crm_crit("join-%d node history update for node %s failed: %s",
 888                  current_join_id, join_from, pcmk_strerror(rc));
 889         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 890     }
 891     free(join_from);
 892     free(xpath);
 893 }
 894 
 895 void
 896 finalize_join_for(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 897 {
 898     xmlNode *acknak = NULL;
 899     xmlNode *tmp1 = NULL;
 900     pcmk__node_status_t *join_node = value;
 901     const char *join_to = join_node->name;
 902     enum controld_join_phase phase = controld_get_join_phase(join_node);
 903     bool integrated = false;
 904 
 905     switch (phase) {
 906         case controld_join_integrated:
 907             integrated = true;
 908             break;
 909         case controld_join_nack:
 910             break;
 911         default:
 912             crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
 913                       "for join-%d",
 914                       join_to, join_phase_text(phase), current_join_id);
 915             return;
 916     }
 917 
 918     /* Update the <node> element with the node's name and UUID, in case they
 919      * weren't known before
 920      */
 921     crm_trace("Updating node name and UUID in CIB for %s", join_to);
 922     tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
 923     crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_node_uuid(join_node));
 924     crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
 925     fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
 926     pcmk__xml_free(tmp1);
 927 
 928     join_node = pcmk__get_node(0, join_to, NULL,
 929                                pcmk__node_search_cluster_member);
 930     if (!pcmk__cluster_is_node_active(join_node)) {
 931         /*
 932          * NACK'ing nodes that the membership layer doesn't know about yet
 933          * simply creates more churn
 934          *
 935          * Better to leave them waiting and let the join restart when
 936          * the new membership event comes in
 937          *
 938          * All other NACKs (due to versions etc) should still be processed
 939          */
 940         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
 941         return;
 942     }
 943 
 944     // Acknowledge or nack node's join request
 945     crm_debug("%sing join-%d request from %s",
 946               integrated? "Acknowledg" : "Nack", current_join_id, join_to);
 947     acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
 948     pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
 949 
 950     if (integrated) {
 951         // No change needed for a nacked node
 952         crm_update_peer_join(__func__, join_node, controld_join_finalized);
 953         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
 954 
 955         /* Iterate through the remote peer cache and add information on which
 956          * node hosts each to the ACK message.  This keeps new controllers in
 957          * sync with what has already happened.
 958          */
 959         if (pcmk__cluster_num_remote_nodes() > 0) {
 960             GHashTableIter iter;
 961             pcmk__node_status_t *node = NULL;
 962             xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);
 963 
 964             g_hash_table_iter_init(&iter, pcmk__remote_peer_cache);
 965             while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 966                 xmlNode *remote = NULL;
 967 
 968                 if (!node->conn_host) {
 969                     continue;
 970                 }
 971 
 972                 remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
 973                 pcmk__xe_set_props(remote,
 974                                    PCMK_XA_ID, node->name,
 975                                    PCMK__XA_NODE_STATE, node->state,
 976                                    PCMK__XA_CONNECTION_HOST, node->conn_host,
 977                                    NULL);
 978             }
 979         }
 980     }
 981     pcmk__cluster_send_message(join_node, pcmk_ipc_controld, acknak);
 982     pcmk__xml_free(acknak);
 983     return;
 984 }
 985 
 986 gboolean
 987 check_join_state(enum crmd_fsa_state cur_state, const char *source)
     /* [previous][next][first][last][top][bottom][index][help] */
 988 {
 989     static unsigned long long highest_seq = 0;
 990 
 991     if (controld_globals.membership_id != controld_globals.peer_seq) {
 992         crm_debug("join-%d: Membership changed from %llu to %llu "
 993                   QB_XS " highest=%llu state=%s for=%s",
 994                   current_join_id, controld_globals.membership_id,
 995                   controld_globals.peer_seq, highest_seq,
 996                   fsa_state2string(cur_state), source);
 997         if (highest_seq < controld_globals.peer_seq) {
 998             /* Don't spam the FSA with duplicates */
 999             highest_seq = controld_globals.peer_seq;
1000             register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
1001         }
1002 
1003     } else if (cur_state == S_INTEGRATION) {
1004         if (crmd_join_phase_count(controld_join_welcomed) == 0) {
1005             int count = crmd_join_phase_count(controld_join_integrated);
1006 
1007             crm_debug("join-%d: Integration of %d peer%s complete "
1008                       QB_XS " state=%s for=%s",
1009                       current_join_id, count, pcmk__plural_s(count),
1010                       fsa_state2string(cur_state), source);
1011             register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
1012             return TRUE;
1013         }
1014 
1015     } else if (cur_state == S_FINALIZE_JOIN) {
1016         if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
1017             crm_debug("join-%d: Delaying finalization until we have CIB "
1018                       QB_XS " state=%s for=%s",
1019                       current_join_id, fsa_state2string(cur_state), source);
1020             return TRUE;
1021 
1022         } else if (crmd_join_phase_count(controld_join_welcomed) != 0) {
1023             int count = crmd_join_phase_count(controld_join_welcomed);
1024 
1025             crm_debug("join-%d: Still waiting on %d welcomed node%s "
1026                       QB_XS " state=%s for=%s",
1027                       current_join_id, count, pcmk__plural_s(count),
1028                       fsa_state2string(cur_state), source);
1029             crmd_join_phase_log(LOG_DEBUG);
1030 
1031         } else if (crmd_join_phase_count(controld_join_integrated) != 0) {
1032             int count = crmd_join_phase_count(controld_join_integrated);
1033 
1034             crm_debug("join-%d: Still waiting on %d integrated node%s "
1035                       QB_XS " state=%s for=%s",
1036                       current_join_id, count, pcmk__plural_s(count),
1037                       fsa_state2string(cur_state), source);
1038             crmd_join_phase_log(LOG_DEBUG);
1039 
1040         } else if (crmd_join_phase_count(controld_join_finalized) != 0) {
1041             int count = crmd_join_phase_count(controld_join_finalized);
1042 
1043             crm_debug("join-%d: Still waiting on %d finalized node%s "
1044                       QB_XS " state=%s for=%s",
1045                       current_join_id, count, pcmk__plural_s(count),
1046                       fsa_state2string(cur_state), source);
1047             crmd_join_phase_log(LOG_DEBUG);
1048 
1049         } else {
1050             crm_debug("join-%d: Complete " QB_XS " state=%s for=%s",
1051                       current_join_id, fsa_state2string(cur_state), source);
1052             register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
1053             return TRUE;
1054         }
1055     }
1056 
1057     return FALSE;
1058 }
1059 
1060 void
1061 do_dc_join_final(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
1062                  enum crmd_fsa_cause cause,
1063                  enum crmd_fsa_state cur_state,
1064                  enum crmd_fsa_input current_input, fsa_data_t * msg_data)
1065 {
1066     crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
1067     crm_update_quorum(pcmk__cluster_has_quorum(), TRUE);
1068 }
1069 
1070 int crmd_join_phase_count(enum controld_join_phase phase)
     /* [previous][next][first][last][top][bottom][index][help] */
1071 {
1072     int count = 0;
1073     pcmk__node_status_t *peer;
1074     GHashTableIter iter;
1075 
1076     g_hash_table_iter_init(&iter, pcmk__peer_cache);
1077     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1078         if (controld_get_join_phase(peer) == phase) {
1079             count++;
1080         }
1081     }
1082     return count;
1083 }
1084 
1085 void crmd_join_phase_log(int level)
     /* [previous][next][first][last][top][bottom][index][help] */
1086 {
1087     pcmk__node_status_t *peer;
1088     GHashTableIter iter;
1089 
1090     g_hash_table_iter_init(&iter, pcmk__peer_cache);
1091     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1092         do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->name,
1093                    join_phase_text(controld_get_join_phase(peer)));
1094     }
1095 }

/* [previous][next][first][last][top][bottom][index][help] */