root/daemons/controld/controld_join_dc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. controld_destroy_failed_sync_table
  2. controld_remove_failed_sync_node
  3. record_failed_sync_node
  4. lookup_failed_sync_node
  5. crm_update_peer_join
  6. start_join_round
  7. create_dc_message
  8. join_make_offer
  9. do_dc_join_offer_all
  10. do_dc_join_offer_one
  11. compare_int_fields
  12. do_dc_join_filter_offer
  13. do_dc_join_finalize
  14. free_max_generation
  15. finalize_sync_callback
  16. join_node_state_commit_callback
  17. do_dc_join_ack
  18. finalize_join_for
  19. check_join_state
  20. do_dc_join_final
  21. crmd_join_phase_count
  22. crmd_join_phase_log

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <crm/crm.h>
  13 
  14 #include <crm/common/xml.h>
  15 #include <crm/cluster.h>
  16 
  17 #include <pacemaker-controld.h>
  18 
  19 static char *max_generation_from = NULL;
  20 static xmlNodePtr max_generation_xml = NULL;
  21 
  22 /*!
  23  * \internal
  24  * \brief Nodes from which a CIB sync has failed since the peer joined
  25  *
  26  * This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
  27  * the name of a client node from which a CIB \p sync_from() call has failed in
  28  * \p do_dc_join_finalize() since the client joined the cluster as a peer.
  29  * \p join_id is the ID of the join round in which the \p sync_from() failed,
  30  * and is intended for use in nack log messages.
  31  */
  32 static GHashTable *failed_sync_nodes = NULL;
  33 
  34 void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
  35 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
  36 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
  37 
  38 /* Numeric counter used to identify join rounds (an unsigned int would be
  39  * appropriate, except we get and set it in XML as int)
  40  */
  41 static int current_join_id = 0;
  42 
  43 /*!
  44  * \internal
  45  * \brief Destroy the hash table containing failed sync nodes
  46  */
  47 void
  48 controld_destroy_failed_sync_table(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  49 {
  50     if (failed_sync_nodes != NULL) {
  51         g_hash_table_destroy(failed_sync_nodes);
  52         failed_sync_nodes = NULL;
  53     }
  54 }
  55 
  56 /*!
  57  * \internal
  58  * \brief Remove a node from the failed sync nodes table if present
  59  *
  60  * \param[in] node_name  Node name to remove
  61  */
  62 void
  63 controld_remove_failed_sync_node(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
  64 {
  65     if (failed_sync_nodes != NULL) {
  66         g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
  67     }
  68 }
  69 
  70 /*!
  71  * \internal
  72  * \brief Add to a hash table a node whose CIB failed to sync
  73  *
  74  * \param[in] node_name  Name of node whose CIB failed to sync
  75  * \param[in] join_id    Join round when the failure occurred
  76  */
  77 static void
  78 record_failed_sync_node(const char *node_name, gint join_id)
     /* [previous][next][first][last][top][bottom][index][help] */
  79 {
  80     if (failed_sync_nodes == NULL) {
  81         failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
  82     }
  83 
  84     /* If the node is already in the table then we failed to nack it during the
  85      * filter offer step
  86      */
  87     CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
  88                                        GINT_TO_POINTER(join_id)));
  89 }
  90 
  91 /*!
  92  * \internal
  93  * \brief Look up a node name in the failed sync table
  94  *
  95  * \param[in]  node_name  Name of node to look up
  96  * \param[out] join_id    Where to store the join ID of when the sync failed
  97  *
  98  * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
  99  *         node name was found, or \p pcmk_rc_node_unknown otherwise.
 100  * \note \p *join_id is set to -1 if the node is not found.
 101  */
 102 static int
 103 lookup_failed_sync_node(const char *node_name, gint *join_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 104 {
 105     *join_id = -1;
 106 
 107     if (failed_sync_nodes != NULL) {
 108         gpointer result = g_hash_table_lookup(failed_sync_nodes,
 109                                               (gchar *) node_name);
 110         if (result != NULL) {
 111             *join_id = GPOINTER_TO_INT(result);
 112             return pcmk_rc_ok;
 113         }
 114     }
 115     return pcmk_rc_node_unknown;
 116 }
 117 
 118 void
 119 crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
     /* [previous][next][first][last][top][bottom][index][help] */
 120 {
 121     enum crm_join_phase last = 0;
 122 
 123     CRM_CHECK(node != NULL, return);
 124 
 125     /* Remote nodes do not participate in joins */
 126     if (pcmk_is_set(node->flags, crm_remote_node)) {
 127         return;
 128     }
 129 
 130     last = node->join;
 131 
 132     if(phase == last) {
 133         crm_trace("Node %s join-%d phase is still %s "
 134                   CRM_XS " nodeid=%u source=%s",
 135                   node->uname, current_join_id, crm_join_phase_str(last),
 136                   node->id, source);
 137 
 138     } else if ((phase <= crm_join_none) || (phase == (last + 1))) {
 139         node->join = phase;
 140         crm_trace("Node %s join-%d phase is now %s (was %s) "
 141                   CRM_XS " nodeid=%u source=%s",
 142                  node->uname, current_join_id, crm_join_phase_str(phase),
 143                  crm_join_phase_str(last), node->id, source);
 144 
 145     } else {
 146         crm_warn("Rejecting join-%d phase update for node %s because "
 147                  "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
 148                  current_join_id, node->uname, crm_join_phase_str(last),
 149                  crm_join_phase_str(phase), node->id, source);
 150     }
 151 }
 152 
 153 static void
 154 start_join_round(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 155 {
 156     GHashTableIter iter;
 157     crm_node_t *peer = NULL;
 158 
 159     crm_debug("Starting new join round join-%d", current_join_id);
 160 
 161     g_hash_table_iter_init(&iter, crm_peer_cache);
 162     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
 163         crm_update_peer_join(__func__, peer, crm_join_none);
 164     }
 165     if (max_generation_from != NULL) {
 166         free(max_generation_from);
 167         max_generation_from = NULL;
 168     }
 169     if (max_generation_xml != NULL) {
 170         free_xml(max_generation_xml);
 171         max_generation_xml = NULL;
 172     }
 173     controld_clear_fsa_input_flags(R_HAVE_CIB);
 174 }
 175 
 176 /*!
 177  * \internal
 178  * \brief Create a join message from the DC
 179  *
 180  * \param[in] join_op  Join operation name
 181  * \param[in] host_to  Recipient of message
 182  */
 183 static xmlNode *
 184 create_dc_message(const char *join_op, const char *host_to)
     /* [previous][next][first][last][top][bottom][index][help] */
 185 {
 186     xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
 187                                   CRM_SYSTEM_DC, NULL);
 188 
 189     /* Identify which election this is a part of */
 190     crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id);
 191 
 192     /* Add a field specifying whether the DC is shutting down. This keeps the
 193      * joining node from fencing the old DC if it becomes the new DC.
 194      */
 195     pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING,
 196                            pcmk_is_set(controld_globals.fsa_input_register,
 197                                        R_SHUTDOWN));
 198     return msg;
 199 }
 200 
 201 static void
 202 join_make_offer(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 203 {
 204     xmlNode *offer = NULL;
 205     crm_node_t *member = (crm_node_t *)value;
 206 
 207     pcmk__assert(member != NULL);
 208     if (!pcmk__cluster_is_node_active(member)) {
 209         crm_info("Not making join-%d offer to inactive node %s",
 210                  current_join_id,
 211                  (member->uname? member->uname : "with unknown name"));
 212         if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
 213             /* You would think this unsafe, but in fact this plus an
 214              * active resource is what causes it to be fenced.
 215              *
 216              * Yes, this does mean that any node that dies at the same
 217              * time as the old DC and is not running resource (still)
 218              * won't be fenced.
 219              *
 220              * I'm not happy about this either.
 221              */
 222             pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
 223         }
 224         return;
 225     }
 226 
 227     if (member->uname == NULL) {
 228         crm_info("Not making join-%d offer to node uuid %s with unknown name",
 229                  current_join_id, member->uuid);
 230         return;
 231     }
 232 
 233     if (controld_globals.membership_id != crm_peer_seq) {
 234         controld_globals.membership_id = crm_peer_seq;
 235         crm_info("Making join-%d offers based on membership event %llu",
 236                  current_join_id, crm_peer_seq);
 237     }
 238 
 239     if(user_data && member->join > crm_join_none) {
 240         crm_info("Not making join-%d offer to already known node %s (%s)",
 241                  current_join_id, member->uname,
 242                  crm_join_phase_str(member->join));
 243         return;
 244     }
 245 
 246     crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);
 247 
 248     offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
 249 
 250     // Advertise our feature set so the joining node can bail if not compatible
 251     crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
 252 
 253     crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
 254     pcmk__cluster_send_message(member, crm_msg_crmd, offer);
 255     free_xml(offer);
 256 
 257     crm_update_peer_join(__func__, member, crm_join_welcomed);
 258 }
 259 
 260 /*       A_DC_JOIN_OFFER_ALL    */
 261 void
 262 do_dc_join_offer_all(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 263                      enum crmd_fsa_cause cause,
 264                      enum crmd_fsa_state cur_state,
 265                      enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 266 {
 267     int count;
 268 
 269     /* Reset everyone's status back to down or in_ccm in the CIB.
 270      * Any nodes that are active in the CIB but not in the cluster membership
 271      * will be seen as offline by the scheduler anyway.
 272      */
 273     current_join_id++;
 274     start_join_round();
 275 
 276     update_dc(NULL);
 277     if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
 278         crm_info("A new node joined the cluster");
 279     }
 280     g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
 281 
 282     count = crmd_join_phase_count(crm_join_welcomed);
 283     crm_info("Waiting on join-%d requests from %d outstanding node%s",
 284              current_join_id, count, pcmk__plural_s(count));
 285 
 286     // Don't waste time by invoking the scheduler yet
 287 }
 288 
 289 /*       A_DC_JOIN_OFFER_ONE    */
 290 void
 291 do_dc_join_offer_one(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 292                      enum crmd_fsa_cause cause,
 293                      enum crmd_fsa_state cur_state,
 294                      enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 295 {
 296     crm_node_t *member;
 297     ha_msg_input_t *welcome = NULL;
 298     int count;
 299     const char *join_to = NULL;
 300 
 301     if (msg_data->data == NULL) {
 302         crm_info("Making join-%d offers to any unconfirmed nodes "
 303                  "because an unknown node joined", current_join_id);
 304         g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
 305         check_join_state(cur_state, __func__);
 306         return;
 307     }
 308 
 309     welcome = fsa_typed_data(fsa_dt_ha_msg);
 310     if (welcome == NULL) {
 311         // fsa_typed_data() already logged an error
 312         return;
 313     }
 314 
 315     join_to = crm_element_value(welcome->msg, PCMK__XA_SRC);
 316     if (join_to == NULL) {
 317         crm_err("Can't make join-%d offer to unknown node", current_join_id);
 318         return;
 319     }
 320     member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);
 321 
 322     /* It is possible that a node will have been sick or starting up when the
 323      * original offer was made. However, it will either re-announce itself in
 324      * due course, or we can re-store the original offer on the client.
 325      */
 326 
 327     crm_update_peer_join(__func__, member, crm_join_none);
 328     join_make_offer(NULL, member, NULL);
 329 
 330     /* If the offer isn't to the local node, make an offer to the local node as
 331      * well, to ensure the correct value for max_generation_from.
 332      */
 333     if (strcasecmp(join_to, controld_globals.our_nodename) != 0) {
 334         member = pcmk__get_node(0, controld_globals.our_nodename, NULL,
 335                                 pcmk__node_search_cluster_member);
 336         join_make_offer(NULL, member, NULL);
 337     }
 338 
 339     /* This was a genuine join request; cancel any existing transition and
 340      * invoke the scheduler.
 341      */
 342     abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
 343                      NULL);
 344 
 345     count = crmd_join_phase_count(crm_join_welcomed);
 346     crm_info("Waiting on join-%d requests from %d outstanding node%s",
 347              current_join_id, count, pcmk__plural_s(count));
 348 
 349     // Don't waste time by invoking the scheduler yet
 350 }
 351 
 352 static int
 353 compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
     /* [previous][next][first][last][top][bottom][index][help] */
 354 {
 355     const char *elem_l = crm_element_value(left, field);
 356     const char *elem_r = crm_element_value(right, field);
 357 
 358     long long int_elem_l;
 359     long long int_elem_r;
 360 
 361     int rc = pcmk_rc_ok;
 362 
 363     rc = pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
 364     if (rc != pcmk_rc_ok) { // Shouldn't be possible
 365         crm_warn("Comparing current CIB %s as -1 "
 366                  "because '%s' is not an integer", field, elem_l);
 367     }
 368 
 369     rc = pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
 370     if (rc != pcmk_rc_ok) { // Shouldn't be possible
 371         crm_warn("Comparing joining node's CIB %s as -1 "
 372                  "because '%s' is not an integer", field, elem_r);
 373     }
 374 
 375     if (int_elem_l < int_elem_r) {
 376         return -1;
 377 
 378     } else if (int_elem_l > int_elem_r) {
 379         return 1;
 380     }
 381 
 382     return 0;
 383 }
 384 
 385 /*       A_DC_JOIN_PROCESS_REQ  */
 386 void
 387 do_dc_join_filter_offer(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 388                         enum crmd_fsa_cause cause,
 389                         enum crmd_fsa_state cur_state,
 390                         enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 391 {
 392     xmlNode *generation = NULL;
 393 
 394     int cmp = 0;
 395     int join_id = -1;
 396     int count = 0;
 397     gint value = 0;
 398     gboolean ack_nack_bool = TRUE;
 399     ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
 400 
 401     const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC);
 402     const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE);
 403     const char *join_version = crm_element_value(join_ack->msg,
 404                                                  PCMK_XA_CRM_FEATURE_SET);
 405     crm_node_t *join_node = NULL;
 406 
 407     if (join_from == NULL) {
 408         crm_err("Ignoring invalid join request without node name");
 409         return;
 410     }
 411     join_node = pcmk__get_node(0, join_from, NULL,
 412                                pcmk__node_search_cluster_member);
 413 
 414     crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
 415     if (join_id != current_join_id) {
 416         crm_debug("Ignoring join-%d request from %s because we are on join-%d",
 417                   join_id, join_from, current_join_id);
 418         check_join_state(cur_state, __func__);
 419         return;
 420     }
 421 
 422     generation = join_ack->xml;
 423     if (max_generation_xml != NULL && generation != NULL) {
 424         int lpc = 0;
 425 
 426         const char *attributes[] = {
 427             PCMK_XA_ADMIN_EPOCH,
 428             PCMK_XA_EPOCH,
 429             PCMK_XA_NUM_UPDATES,
 430         };
 431 
 432         /* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE
 433          * element from the join client. The "if" guard is for clarity.
 434          */
 435         if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
 436             for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
 437                 cmp = compare_int_fields(max_generation_xml, generation,
 438                                          attributes[lpc]);
 439             }
 440 
 441         } else {    // Should always be PCMK__XE_GENERATION_TUPLE
 442             CRM_LOG_ASSERT(false);
 443         }
 444     }
 445 
 446     if (ref == NULL) {
 447         ref = "none"; // for logging only
 448     }
 449 
 450     if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
 451         crm_err("Rejecting join-%d request from node %s because we failed to "
 452                 "sync its CIB in join-%d " CRM_XS " ref=%s",
 453                 join_id, join_from, value, ref);
 454         ack_nack_bool = FALSE;
 455 
 456     } else if (!pcmk__cluster_is_node_active(join_node)) {
 457         if (match_down_event(join_from) != NULL) {
 458             /* The join request was received after the node was fenced or
 459              * otherwise shutdown in a way that we're aware of. No need to log
 460              * an error in this rare occurrence; we know the client was recently
 461              * shut down, and receiving a lingering in-flight request is not
 462              * cause for alarm.
 463              */
 464             crm_debug("Rejecting join-%d request from inactive node %s "
 465                       CRM_XS " ref=%s", join_id, join_from, ref);
 466         } else {
 467             crm_err("Rejecting join-%d request from inactive node %s "
 468                     CRM_XS " ref=%s", join_id, join_from, ref);
 469         }
 470         ack_nack_bool = FALSE;
 471 
 472     } else if (generation == NULL) {
 473         crm_err("Rejecting invalid join-%d request from node %s "
 474                 "missing CIB generation " CRM_XS " ref=%s",
 475                 join_id, join_from, ref);
 476         ack_nack_bool = FALSE;
 477 
 478     } else if ((join_version == NULL)
 479                || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
 480         crm_err("Rejecting join-%d request from node %s because feature set %s"
 481                 " is incompatible with ours (%s) " CRM_XS " ref=%s",
 482                 join_id, join_from, (join_version? join_version : "pre-3.1.0"),
 483                 CRM_FEATURE_SET, ref);
 484         ack_nack_bool = FALSE;
 485 
 486     } else if (max_generation_xml == NULL) {
 487         const char *validation = crm_element_value(generation,
 488                                                    PCMK_XA_VALIDATE_WITH);
 489 
 490         if (pcmk__get_schema(validation) == NULL) {
 491             crm_err("Rejecting join-%d request from %s (with first CIB "
 492                     "generation) due to unknown schema version %s "
 493                     CRM_XS " ref=%s",
 494                     join_id, join_from, pcmk__s(validation, "(missing)"), ref);
 495             ack_nack_bool = FALSE;
 496 
 497         } else {
 498             crm_debug("Accepting join-%d request from %s (with first CIB "
 499                       "generation) " CRM_XS " ref=%s",
 500                       join_id, join_from, ref);
 501             max_generation_xml = pcmk__xml_copy(NULL, generation);
 502             pcmk__str_update(&max_generation_from, join_from);
 503         }
 504 
 505     } else if ((cmp < 0)
 506                || ((cmp == 0)
 507                    && pcmk__str_eq(join_from, controld_globals.our_nodename,
 508                                    pcmk__str_casei))) {
 509         const char *validation = crm_element_value(generation,
 510                                                    PCMK_XA_VALIDATE_WITH);
 511 
 512         if (pcmk__get_schema(validation) == NULL) {
 513             crm_err("Rejecting join-%d request from %s (with better CIB "
 514                     "generation than current best from %s) due to unknown "
 515                     "schema version %s " CRM_XS " ref=%s",
 516                     join_id, join_from, max_generation_from,
 517                     pcmk__s(validation, "(missing)"), ref);
 518             ack_nack_bool = FALSE;
 519 
 520         } else {
 521             crm_debug("Accepting join-%d request from %s (with better CIB "
 522                       "generation than current best from %s) " CRM_XS " ref=%s",
 523                       join_id, join_from, max_generation_from, ref);
 524             crm_log_xml_debug(max_generation_xml, "Old max generation");
 525             crm_log_xml_debug(generation, "New max generation");
 526 
 527             free_xml(max_generation_xml);
 528             max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
 529             pcmk__str_update(&max_generation_from, join_from);
 530         }
 531 
 532     } else {
 533         crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
 534                   join_id, join_from, ref);
 535     }
 536 
 537     if (!ack_nack_bool) {
 538         if (compare_version(join_version, "3.17.0") < 0) {
 539             /* Clients with CRM_FEATURE_SET < 3.17.0 may respawn infinitely
 540              * after a nack message, don't send one
 541              */
 542             crm_update_peer_join(__func__, join_node, crm_join_nack_quiet);
 543         } else {
 544             crm_update_peer_join(__func__, join_node, crm_join_nack);
 545         }
 546         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
 547 
 548     } else {
 549         crm_update_peer_join(__func__, join_node, crm_join_integrated);
 550         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
 551     }
 552 
 553     count = crmd_join_phase_count(crm_join_integrated);
 554     crm_debug("%d node%s currently integrated in join-%d",
 555               count, pcmk__plural_s(count), join_id);
 556 
 557     if (check_join_state(cur_state, __func__) == FALSE) {
 558         // Don't waste time by invoking the scheduler yet
 559         count = crmd_join_phase_count(crm_join_welcomed);
 560         crm_debug("Waiting on join-%d requests from %d outstanding node%s",
 561                   join_id, count, pcmk__plural_s(count));
 562     }
 563 }
 564 
 565 /*      A_DC_JOIN_FINALIZE      */
 566 void
 567 do_dc_join_finalize(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 568                     enum crmd_fsa_cause cause,
 569                     enum crmd_fsa_state cur_state,
 570                     enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 571 {
 572     char *sync_from = NULL;
 573     int rc = pcmk_ok;
 574     int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
 575     int count_finalizable = crmd_join_phase_count(crm_join_integrated)
 576                             + crmd_join_phase_count(crm_join_nack)
 577                             + crmd_join_phase_count(crm_join_nack_quiet);
 578 
 579     /* This we can do straight away and avoid clients timing us out
 580      *  while we compute the latest CIB
 581      */
 582     if (count_welcomed != 0) {
 583         crm_debug("Waiting on join-%d requests from %d outstanding node%s "
 584                   "before finalizing join", current_join_id, count_welcomed,
 585                   pcmk__plural_s(count_welcomed));
 586         crmd_join_phase_log(LOG_DEBUG);
 587         /* crmd_fsa_stall(FALSE); Needed? */
 588         return;
 589 
 590     } else if (count_finalizable == 0) {
 591         crm_debug("Finalization not needed for join-%d at the current time",
 592                   current_join_id);
 593         crmd_join_phase_log(LOG_DEBUG);
 594         check_join_state(controld_globals.fsa_state, __func__);
 595         return;
 596     }
 597 
 598     controld_clear_fsa_input_flags(R_HAVE_CIB);
 599     if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename,
 600                      pcmk__str_null_matches|pcmk__str_casei)) {
 601         controld_set_fsa_input_flags(R_HAVE_CIB);
 602     }
 603 
 604     if (!controld_globals.transition_graph->complete) {
 605         crm_warn("Delaying join-%d finalization while transition in progress",
 606                  current_join_id);
 607         crmd_join_phase_log(LOG_DEBUG);
 608         crmd_fsa_stall(FALSE);
 609         return;
 610     }
 611 
 612     if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
 613         // Send our CIB out to everyone
 614         sync_from = pcmk__str_copy(controld_globals.our_nodename);
 615         crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
 616                   current_join_id, count_finalizable,
 617                   pcmk__plural_s(count_finalizable));
 618         crm_log_xml_debug(max_generation_xml, "Requested CIB version");
 619 
 620     } else {
 621         // Ask for the agreed best CIB
 622         sync_from = pcmk__str_copy(max_generation_from);
 623         crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
 624                    current_join_id, count_finalizable,
 625                    pcmk__plural_s(count_finalizable), sync_from);
 626         crm_log_xml_notice(max_generation_xml, "Requested CIB version");
 627     }
 628     crmd_join_phase_log(LOG_DEBUG);
 629 
 630     rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
 631                                                     sync_from, NULL, cib_none);
 632     fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
 633 }
 634 
 635 void
 636 free_max_generation(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 637 {
 638     free(max_generation_from);
 639     max_generation_from = NULL;
 640 
 641     free_xml(max_generation_xml);
 642     max_generation_xml = NULL;
 643 }
 644 
 645 void
 646 finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 647 {
 648     CRM_LOG_ASSERT(-EPERM != rc);
 649 
 650     if (rc != pcmk_ok) {
 651         const char *sync_from = (const char *) user_data;
 652 
 653         do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
 654                    "Could not sync CIB from %s in join-%d: %s",
 655                    sync_from, current_join_id, pcmk_strerror(rc));
 656 
 657         if (rc != -pcmk_err_old_data) {
 658             record_failed_sync_node(sync_from, current_join_id);
 659         }
 660 
 661         /* restart the whole join process */
 662         register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
 663                                __func__);
 664 
 665     } else if (!AM_I_DC) {
 666         crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
 667 
 668     } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
 669         crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
 670                   "(%s)", current_join_id,
 671                   fsa_state2string(controld_globals.fsa_state));
 672 
 673     } else {
 674         controld_set_fsa_input_flags(R_HAVE_CIB);
 675 
 676         /* make sure dc_uuid is re-set to us */
 677         if (!check_join_state(controld_globals.fsa_state, __func__)) {
 678             int count_finalizable = 0;
 679 
 680             count_finalizable = crmd_join_phase_count(crm_join_integrated)
 681                                 + crmd_join_phase_count(crm_join_nack)
 682                                 + crmd_join_phase_count(crm_join_nack_quiet);
 683 
 684             crm_debug("Notifying %d node%s of join-%d results",
 685                       count_finalizable, pcmk__plural_s(count_finalizable),
 686                       current_join_id);
 687             g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
 688         }
 689     }
 690 }
 691 
 692 static void
 693 join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
     /* [previous][next][first][last][top][bottom][index][help] */
 694                                 xmlNode *output, void *user_data)
 695 {
 696     const char *node = user_data;
 697 
 698     if (rc != pcmk_ok) {
 699         fsa_data_t *msg_data = NULL;    // for register_fsa_error() macro
 700 
 701         crm_crit("join-%d node history update (via CIB call %d) for node %s "
 702                  "failed: %s",
 703                  current_join_id, call_id, node, pcmk_strerror(rc));
 704         crm_log_xml_debug(msg, "failed");
 705         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 706     }
 707 
 708     crm_debug("join-%d node history update (via CIB call %d) for node %s "
 709               "complete",
 710               current_join_id, call_id, node);
 711     check_join_state(controld_globals.fsa_state, __func__);
 712 }
 713 
 714 /*      A_DC_JOIN_PROCESS_ACK   */
 715 void
 716 do_dc_join_ack(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 717                enum crmd_fsa_cause cause,
 718                enum crmd_fsa_state cur_state,
 719                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 720 {
 721     int join_id = -1;
 722     ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
 723 
 724     const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK);
 725     char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC);
 726     crm_node_t *peer = NULL;
 727 
 728     enum controld_section_e section = controld_section_lrm;
 729     char *xpath = NULL;
 730     xmlNode *state = join_ack->xml;
 731     xmlNode *execd_state = NULL;
 732 
 733     cib_t *cib = controld_globals.cib_conn;
 734     int rc = pcmk_ok;
 735 
 736     // Sanity checks
 737     if (join_from == NULL) {
 738         crm_warn("Ignoring message received without node identification");
 739         goto done;
 740     }
 741     if (op == NULL) {
 742         crm_warn("Ignoring message received from %s without task", join_from);
 743         goto done;
 744     }
 745 
 746     if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
 747         crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
 748                   op, join_from, CRM_OP_JOIN_CONFIRM);
 749         goto done;
 750     }
 751 
 752     if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) {
 753         crm_warn("Ignoring join confirmation from %s without valid join ID",
 754                  join_from);
 755         goto done;
 756     }
 757 
 758     peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
 759     if (peer->join != crm_join_finalized) {
 760         crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
 761                  "(currently %s not %s)",
 762                  join_id, join_from, crm_join_phase_str(peer->join),
 763                  crm_join_phase_str(crm_join_finalized));
 764         goto done;
 765     }
 766 
 767     if (join_id != current_join_id) {
 768         crm_err("Rejecting join-%d confirmation from %s "
 769                 "because currently on join-%d",
 770                 join_id, join_from, current_join_id);
 771         crm_update_peer_join(__func__, peer, crm_join_nack);
 772         goto done;
 773     }
 774 
 775     crm_update_peer_join(__func__, peer, crm_join_confirmed);
 776 
 777     /* Update CIB with node's current executor state. A new transition will be
 778      * triggered later, when the CIB manager notifies us of the change.
 779      *
 780      * The delete and modify requests are part of an atomic transaction.
 781      */
 782     rc = cib->cmds->init_transaction(cib);
 783     if (rc != pcmk_ok) {
 784         goto done;
 785     }
 786 
 787     // Delete relevant parts of node's current executor state from CIB
 788     if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
 789         section = controld_section_lrm_unlocked;
 790     }
 791     controld_node_state_deletion_strings(join_from, section, &xpath, NULL);
 792 
 793     rc = cib->cmds->remove(cib, xpath, NULL,
 794                            cib_scope_local
 795                            |cib_xpath
 796                            |cib_multiple
 797                            |cib_transaction);
 798     if (rc != pcmk_ok) {
 799         goto done;
 800     }
 801 
 802     // Update CIB with node's latest known executor state
 803     if (pcmk__str_eq(join_from, controld_globals.our_nodename,
 804                      pcmk__str_casei)) {
 805 
 806         // Use the latest possible state if processing our own join ack
 807         execd_state = controld_query_executor_state();
 808 
 809         if (execd_state != NULL) {
 810             crm_debug("Updating local node history for join-%d from query "
 811                       "result",
 812                       current_join_id);
 813             state = execd_state;
 814 
 815         } else {
 816             crm_warn("Updating local node history from join-%d confirmation "
 817                      "because query failed",
 818                      current_join_id);
 819         }
 820 
 821     } else {
 822         crm_debug("Updating node history for %s from join-%d confirmation",
 823                   join_from, current_join_id);
 824     }
 825 
 826     rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state,
 827                            cib_scope_local|cib_can_create|cib_transaction);
 828     free_xml(execd_state);
 829     if (rc != pcmk_ok) {
 830         goto done;
 831     }
 832 
 833     // Commit the transaction
 834     rc = cib->cmds->end_transaction(cib, true, cib_scope_local);
 835     fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);
 836 
 837     if (rc > 0) {
 838         // join_from will be freed after callback
 839         join_from = NULL;
 840         rc = pcmk_ok;
 841     }
 842 
 843 done:
 844     if (rc != pcmk_ok) {
 845         crm_crit("join-%d node history update for node %s failed: %s",
 846                  current_join_id, join_from, pcmk_strerror(rc));
 847         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 848     }
 849     free(join_from);
 850     free(xpath);
 851 }
 852 
 853 void
 854 finalize_join_for(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 855 {
 856     xmlNode *acknak = NULL;
 857     xmlNode *tmp1 = NULL;
 858     crm_node_t *join_node = value;
 859     const char *join_to = join_node->uname;
 860     bool integrated = false;
 861 
 862     switch (join_node->join) {
 863         case crm_join_integrated:
 864             integrated = true;
 865             break;
 866         case crm_join_nack:
 867         case crm_join_nack_quiet:
 868             break;
 869         default:
 870             crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
 871                       "for join-%d", join_to,
 872                       crm_join_phase_str(join_node->join), current_join_id);
 873             return;
 874     }
 875 
 876     /* Update the <node> element with the node's name and UUID, in case they
 877      * weren't known before
 878      */
 879     crm_trace("Updating node name and UUID in CIB for %s", join_to);
 880     tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
 881     crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_node_uuid(join_node));
 882     crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
 883     fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
 884     free_xml(tmp1);
 885 
 886     if (join_node->join == crm_join_nack_quiet) {
 887         crm_trace("Not sending nack message to node %s with feature set older "
 888                   "than 3.17.0", join_to);
 889         return;
 890     }
 891 
 892     join_node = pcmk__get_node(0, join_to, NULL,
 893                                pcmk__node_search_cluster_member);
 894     if (!pcmk__cluster_is_node_active(join_node)) {
 895         /*
 896          * NACK'ing nodes that the membership layer doesn't know about yet
 897          * simply creates more churn
 898          *
 899          * Better to leave them waiting and let the join restart when
 900          * the new membership event comes in
 901          *
 902          * All other NACKs (due to versions etc) should still be processed
 903          */
 904         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
 905         return;
 906     }
 907 
 908     // Acknowledge or nack node's join request
 909     crm_debug("%sing join-%d request from %s",
 910               integrated? "Acknowledg" : "Nack", current_join_id, join_to);
 911     acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
 912     pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
 913 
 914     if (integrated) {
 915         // No change needed for a nacked node
 916         crm_update_peer_join(__func__, join_node, crm_join_finalized);
 917         pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
 918 
 919         /* Iterate through the remote peer cache and add information on which
 920          * node hosts each to the ACK message.  This keeps new controllers in
 921          * sync with what has already happened.
 922          */
 923         if (pcmk__cluster_num_remote_nodes() > 0) {
 924             GHashTableIter iter;
 925             crm_node_t *node = NULL;
 926             xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);
 927 
 928             g_hash_table_iter_init(&iter, crm_remote_peer_cache);
 929             while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 930                 xmlNode *remote = NULL;
 931 
 932                 if (!node->conn_host) {
 933                     continue;
 934                 }
 935 
 936                 remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
 937                 pcmk__xe_set_props(remote,
 938                                    PCMK_XA_ID, node->uname,
 939                                    PCMK__XA_NODE_STATE, node->state,
 940                                    PCMK__XA_CONNECTION_HOST, node->conn_host,
 941                                    NULL);
 942             }
 943         }
 944     }
 945     pcmk__cluster_send_message(join_node, crm_msg_crmd, acknak);
 946     free_xml(acknak);
 947     return;
 948 }
 949 
 950 gboolean
 951 check_join_state(enum crmd_fsa_state cur_state, const char *source)
     /* [previous][next][first][last][top][bottom][index][help] */
 952 {
 953     static unsigned long long highest_seq = 0;
 954 
 955     if (controld_globals.membership_id != crm_peer_seq) {
 956         crm_debug("join-%d: Membership changed from %llu to %llu "
 957                   CRM_XS " highest=%llu state=%s for=%s",
 958                   current_join_id, controld_globals.membership_id, crm_peer_seq,
 959                   highest_seq, fsa_state2string(cur_state), source);
 960         if(highest_seq < crm_peer_seq) {
 961             /* Don't spam the FSA with duplicates */
 962             highest_seq = crm_peer_seq;
 963             register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
 964         }
 965 
 966     } else if (cur_state == S_INTEGRATION) {
 967         if (crmd_join_phase_count(crm_join_welcomed) == 0) {
 968             int count = crmd_join_phase_count(crm_join_integrated);
 969 
 970             crm_debug("join-%d: Integration of %d peer%s complete "
 971                       CRM_XS " state=%s for=%s",
 972                       current_join_id, count, pcmk__plural_s(count),
 973                       fsa_state2string(cur_state), source);
 974             register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
 975             return TRUE;
 976         }
 977 
 978     } else if (cur_state == S_FINALIZE_JOIN) {
 979         if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
 980             crm_debug("join-%d: Delaying finalization until we have CIB "
 981                       CRM_XS " state=%s for=%s",
 982                       current_join_id, fsa_state2string(cur_state), source);
 983             return TRUE;
 984 
 985         } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
 986             int count = crmd_join_phase_count(crm_join_welcomed);
 987 
 988             crm_debug("join-%d: Still waiting on %d welcomed node%s "
 989                       CRM_XS " state=%s for=%s",
 990                       current_join_id, count, pcmk__plural_s(count),
 991                       fsa_state2string(cur_state), source);
 992             crmd_join_phase_log(LOG_DEBUG);
 993 
 994         } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
 995             int count = crmd_join_phase_count(crm_join_integrated);
 996 
 997             crm_debug("join-%d: Still waiting on %d integrated node%s "
 998                       CRM_XS " state=%s for=%s",
 999                       current_join_id, count, pcmk__plural_s(count),
1000                       fsa_state2string(cur_state), source);
1001             crmd_join_phase_log(LOG_DEBUG);
1002 
1003         } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
1004             int count = crmd_join_phase_count(crm_join_finalized);
1005 
1006             crm_debug("join-%d: Still waiting on %d finalized node%s "
1007                       CRM_XS " state=%s for=%s",
1008                       current_join_id, count, pcmk__plural_s(count),
1009                       fsa_state2string(cur_state), source);
1010             crmd_join_phase_log(LOG_DEBUG);
1011 
1012         } else {
1013             crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
1014                       current_join_id, fsa_state2string(cur_state), source);
1015             register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
1016             return TRUE;
1017         }
1018     }
1019 
1020     return FALSE;
1021 }
1022 
1023 void
1024 do_dc_join_final(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
1025                  enum crmd_fsa_cause cause,
1026                  enum crmd_fsa_state cur_state,
1027                  enum crmd_fsa_input current_input, fsa_data_t * msg_data)
1028 {
1029     crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
1030     crm_update_quorum(crm_have_quorum, TRUE);
1031 }
1032 
1033 int crmd_join_phase_count(enum crm_join_phase phase)
     /* [previous][next][first][last][top][bottom][index][help] */
1034 {
1035     int count = 0;
1036     crm_node_t *peer;
1037     GHashTableIter iter;
1038 
1039     g_hash_table_iter_init(&iter, crm_peer_cache);
1040     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1041         if(peer->join == phase) {
1042             count++;
1043         }
1044     }
1045     return count;
1046 }
1047 
1048 void crmd_join_phase_log(int level)
     /* [previous][next][first][last][top][bottom][index][help] */
1049 {
1050     crm_node_t *peer;
1051     GHashTableIter iter;
1052 
1053     g_hash_table_iter_init(&iter, crm_peer_cache);
1054     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
1055         do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
1056                    crm_join_phase_str(peer->join));
1057     }
1058 }

/* [previous][next][first][last][top][bottom][index][help] */