root/lib/cluster/membership.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. pcmk__cluster_has_quorum
  2. pcmk__cluster_set_quorum
  3. pcmk__cluster_num_remote_nodes
  4. pcmk__cluster_lookup_remote_node
  5. pcmk__cluster_forget_remote_node
  6. remote_state_from_cib
  7. remote_cache_refresh_helper
  8. mark_dirty
  9. is_dirty
  10. refresh_remote_nodes
  11. pcmk__cluster_is_node_active
  12. should_forget_cluster_node
  13. pcmk__cluster_forget_cluster_node
  14. count_peer
  15. pcmk__cluster_num_active_nodes
  16. destroy_crm_node
  17. pcmk__cluster_init_node_caches
  18. pcmk__cluster_destroy_node_caches
  19. pcmk__cluster_set_status_callback
  20. pcmk__cluster_set_autoreap
  21. dump_peer_hash
  22. hash_find_by_data
  23. search_cluster_member_cache
  24. pcmk__search_node_caches
  25. pcmk__purge_node_from_cache
  26. remove_conflicting_peer
  27. pcmk__get_node
  28. update_peer_uname
  29. proc2text
  30. crm_update_peer_proc
  31. pcmk__update_peer_expected
  32. update_peer_state_iter
  33. pcmk__update_peer_state
  34. pcmk__reap_unseen_nodes
  35. find_cib_cluster_node
  36. cluster_node_cib_cache_refresh_helper
  37. refresh_cluster_node_cib_cache
  38. pcmk__refresh_node_caches_from_cib
  39. crm_peer_init

   1 /*
   2  * Copyright 2004-2025 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <inttypes.h>                   // PRIu32
  13 #include <stdbool.h>                    // bool
  14 #include <stdio.h>
  15 #include <string.h>
  16 #include <sys/param.h>
  17 #include <sys/types.h>
  18 #include <unistd.h>
  19 
  20 #include <glib.h>
  21 
  22 #include <crm/common/ipc.h>
  23 #include <crm/common/xml_internal.h>
  24 #include <crm/cluster/internal.h>
  25 #include <crm/common/xml.h>
  26 #include <crm/stonith-ng.h>
  27 #include "crmcluster_private.h"
  28 
  29 /* The peer cache remembers cluster nodes that have been seen. This is managed
  30  * mostly automatically by libcrmcluster, based on cluster membership events.
  31  *
  32  * Because cluster nodes can have conflicting names or UUIDs, the hash table key
  33  * is a uniquely generated ID.
  34  *
  35  * @TODO Move caches to pcmk_cluster_t
  36  */
  37 GHashTable *pcmk__peer_cache = NULL;
  38 
  39 /* The remote peer cache tracks pacemaker_remote nodes. While the
  40  * value has the same type as the peer cache's, it is tracked separately for
  41  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
  42  * so the name (which is also the UUID) is used as the hash table key; there
  43  * is no equivalent of membership events, so management is not automatic; and
  44  * most users of the peer cache need to exclude pacemaker_remote nodes.
  45  *
  46  * @TODO That said, using a single cache would be more logical and less
  47  * error-prone, so it would be a good idea to merge them one day.
  48  *
  49  * libcrmcluster provides two avenues for populating the cache:
  50  * pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node()
  51  * directly manage it, while refresh_remote_nodes() populates it via the CIB.
  52  *
  53  * @TODO Move caches to pcmk_cluster_t
  54  */
  55 GHashTable *pcmk__remote_peer_cache = NULL;
  56 
  57 /*
  58  * The CIB cluster node cache tracks cluster nodes that have been seen in
  59  * the CIB. It is useful mainly when a caller needs to know about a node that
  60  * may no longer be in the membership, but doesn't want to add the node to the
  61  * main peer cache tables.
  62  */
  63 static GHashTable *cluster_node_cib_cache = NULL;
  64 
  65 static bool autoreap = true;
  66 static bool has_quorum = false;
  67 
  68 // Flag setting and clearing for pcmk__node_status_t:flags
  69 
  70 #define set_peer_flags(peer, flags_to_set) do {                               \
  71         (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE,     \
  72                                            "Peer", (peer)->name,              \
  73                                            (peer)->flags, (flags_to_set),     \
  74                                            #flags_to_set);                    \
  75     } while (0)
  76 
  77 #define clear_peer_flags(peer, flags_to_clear) do {                           \
  78         (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__,              \
  79                                              LOG_TRACE,                       \
  80                                              "Peer", (peer)->name,            \
  81                                              (peer)->flags, (flags_to_clear), \
  82                                              #flags_to_clear);                \
  83     } while (0)
  84 
  85 static void update_peer_uname(pcmk__node_status_t *node, const char *uname);
  86 static pcmk__node_status_t *find_cib_cluster_node(const char *id,
  87                                                   const char *uname);
  88 
  89 /*!
  90  * \internal
  91  * \brief Check whether the cluster currently has quorum
  92  *
  93  * \return \c true if the cluster has quorum, or \c false otherwise
  94  */
  95 bool
  96 pcmk__cluster_has_quorum(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  97 {
  98     return has_quorum;
  99 }
 100 
 101 /*!
 102  * \internal
 103  * \brief Set whether the cluster currently has quorum
 104  *
 105  * \param[in] quorate  \c true if the cluster has quorum, or \c false otherwise
 106  */
 107 void
 108 pcmk__cluster_set_quorum(bool quorate)
     /* [previous][next][first][last][top][bottom][index][help] */
 109 {
 110     has_quorum = quorate;
 111 }
 112 
 113 /*!
 114  * \internal
 115  * \brief Get the number of Pacemaker Remote nodes that have been seen
 116  *
 117  * \return Number of cached Pacemaker Remote nodes
 118  */
 119 unsigned int
 120 pcmk__cluster_num_remote_nodes(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 121 {
 122     if (pcmk__remote_peer_cache == NULL) {
 123         return 0U;
 124     }
 125     return g_hash_table_size(pcmk__remote_peer_cache);
 126 }
 127 
 128 /*!
 129  * \internal
 130  * \brief Get a remote node cache entry, creating it if necessary
 131  *
 132  * \param[in] node_name  Name of remote node
 133  *
 134  * \return Cache entry for node on success, or \c NULL (and set \c errno)
 135  *         otherwise
 136  *
 137  * \note When creating a new entry, this will leave the node state undetermined.
 138  *       The caller should also call \c pcmk__update_peer_state() if the state
 139  *       is known.
 140  * \note Because this can add and remove cache entries, callers should not
 141  *       assume any previously obtained cache entry pointers remain valid.
 142  */
 143 pcmk__node_status_t *
 144 pcmk__cluster_lookup_remote_node(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 145 {
 146     pcmk__node_status_t *node = NULL;
 147     char *node_name_copy = NULL;
 148 
 149     if (node_name == NULL) {
 150         errno = EINVAL;
 151         return NULL;
 152     }
 153 
 154     /* It's theoretically possible that the node was added to the cluster peer
 155      * cache before it was known to be a Pacemaker Remote node. Remove that
 156      * entry unless it has an XML ID, which means the name actually is
 157      * associated with a cluster node. (@TODO return an error in that case?)
 158      */
 159     node = pcmk__search_node_caches(0, node_name, NULL,
 160                                     pcmk__node_search_cluster_member);
 161     if ((node != NULL)
 162         && ((node->xml_id == NULL)
 163             /* This assumes only Pacemaker Remote nodes have their XML ID the
 164              * same as their node name
 165              */
 166             || pcmk__str_eq(node->name, node->xml_id, pcmk__str_none))) {
 167 
 168         /* node_name could be a pointer into the cache entry being removed, so
 169          * reassign it to a copy before the original gets freed
 170          */
 171         node_name_copy = strdup(node_name);
 172         if (node_name_copy == NULL) {
 173             errno = ENOMEM;
 174             return NULL;
 175         }
 176         node_name = node_name_copy;
 177         pcmk__cluster_forget_cluster_node(0, node_name);
 178     }
 179 
 180     /* Return existing cache entry if one exists */
 181     node = g_hash_table_lookup(pcmk__remote_peer_cache, node_name);
 182     if (node) {
 183         free(node_name_copy);
 184         return node;
 185     }
 186 
 187     /* Allocate a new entry */
 188     node = calloc(1, sizeof(pcmk__node_status_t));
 189     if (node == NULL) {
 190         free(node_name_copy);
 191         return NULL;
 192     }
 193 
 194     /* Populate the essential information */
 195     set_peer_flags(node, pcmk__node_status_remote);
 196     node->xml_id = strdup(node_name);
 197     if (node->xml_id == NULL) {
 198         free(node);
 199         errno = ENOMEM;
 200         free(node_name_copy);
 201         return NULL;
 202     }
 203 
 204     /* Add the new entry to the cache */
 205     g_hash_table_replace(pcmk__remote_peer_cache, node->xml_id, node);
 206     crm_trace("added %s to remote cache", node_name);
 207 
 208     /* Update the entry's uname, ensuring peer status callbacks are called */
 209     update_peer_uname(node, node_name);
 210     free(node_name_copy);
 211     return node;
 212 }
 213 
 214 /*!
 215  * \internal
 216  * \brief Remove a node from the Pacemaker Remote node cache
 217  *
 218  * \param[in] node_name  Name of node to remove from cache
 219  *
 220  * \note The caller must be careful not to use \p node_name after calling this
 221  *       function if it might be a pointer into the cache entry being removed.
 222  */
 223 void
 224 pcmk__cluster_forget_remote_node(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 225 {
 226     /* Do a lookup first, because node_name could be a pointer within the entry
 227      * being removed -- we can't log it *after* removing it.
 228      */
 229     if (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL) {
 230         crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
 231         g_hash_table_remove(pcmk__remote_peer_cache, node_name);
 232     }
 233 }
 234 
 235 /*!
 236  * \internal
 237  * \brief Return node status based on a CIB status entry
 238  *
 239  * \param[in] node_state  XML of node state
 240  *
 241  * \return \c PCMK_VALUE_MEMBER if \c PCMK__XA_IN_CCM is true in
 242  *         \c PCMK__XE_NODE_STATE, or \c PCMK__VALUE_LOST otherwise
 243  */
 244 static const char *
 245 remote_state_from_cib(const xmlNode *node_state)
     /* [previous][next][first][last][top][bottom][index][help] */
 246 {
 247     bool in_ccm = false;
 248 
 249     if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM,
 250                                 &in_ccm) == pcmk_rc_ok) && in_ccm) {
 251         return PCMK_VALUE_MEMBER;
 252     }
 253     return PCMK__VALUE_LOST;
 254 }
 255 
 256 /* user data for looping through remote node xpath searches */
 257 struct refresh_data {
 258     const char *field;  /* XML attribute to check for node name */
 259     gboolean has_state; /* whether to update node state based on XML */
 260 };
 261 
 262 /*!
 263  * \internal
 264  * \brief Process one pacemaker_remote node xpath search result
 265  *
 266  * \param[in] result     XML search result
 267  * \param[in] user_data  what to look for in the XML
 268  */
 269 static void
 270 remote_cache_refresh_helper(xmlNode *result, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 271 {
 272     const struct refresh_data *data = user_data;
 273     const char *remote = crm_element_value(result, data->field);
 274     const char *state = NULL;
 275     pcmk__node_status_t *node;
 276 
 277     CRM_CHECK(remote != NULL, return);
 278 
 279     /* Determine node's state, if the result has it */
 280     if (data->has_state) {
 281         state = remote_state_from_cib(result);
 282     }
 283 
 284     /* Check whether cache already has entry for node */
 285     node = g_hash_table_lookup(pcmk__remote_peer_cache, remote);
 286 
 287     if (node == NULL) {
 288         /* Node is not in cache, so add a new entry for it */
 289         node = pcmk__cluster_lookup_remote_node(remote);
 290         pcmk__assert(node != NULL);
 291         if (state) {
 292             pcmk__update_peer_state(__func__, node, state, 0);
 293         }
 294 
 295     } else if (pcmk_is_set(node->flags, pcmk__node_status_dirty)) {
 296         /* Node is in cache and hasn't been updated already, so mark it clean */
 297         clear_peer_flags(node, pcmk__node_status_dirty);
 298         if (state) {
 299             pcmk__update_peer_state(__func__, node, state, 0);
 300         }
 301     }
 302 }
 303 
 304 static void
 305 mark_dirty(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 306 {
 307     set_peer_flags((pcmk__node_status_t *) value, pcmk__node_status_dirty);
 308 }
 309 
 310 static gboolean
 311 is_dirty(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 312 {
 313     const pcmk__node_status_t *node = value;
 314 
 315     return pcmk_is_set(node->flags, pcmk__node_status_dirty);
 316 }
 317 
 318 /*!
 319  * \internal
 320  * \brief Repopulate the remote node cache based on CIB XML
 321  *
 322  * \param[in] cib  CIB XML to parse
 323  */
 324 static void
 325 refresh_remote_nodes(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
 326 {
 327     struct refresh_data data;
 328 
 329     pcmk__cluster_init_node_caches();
 330 
 331     /* First, we mark all existing cache entries as dirty,
 332      * so that later we can remove any that weren't in the CIB.
 333      * We don't empty the cache, because we need to detect changes in state.
 334      */
 335     g_hash_table_foreach(pcmk__remote_peer_cache, mark_dirty, NULL);
 336 
 337     /* Look for guest nodes and remote nodes in the status section */
 338     data.field = PCMK_XA_ID;
 339     data.has_state = TRUE;
 340     pcmk__xpath_foreach_result(cib->doc, PCMK__XP_REMOTE_NODE_STATUS,
 341                                remote_cache_refresh_helper, &data);
 342 
 343     /* Look for guest nodes and remote nodes in the configuration section,
 344      * because they may have just been added and not have a status entry yet.
 345      * In that case, the cached node state will be left NULL, so that the
 346      * peer status callback isn't called until we're sure the node started
 347      * successfully.
 348      */
 349     data.field = PCMK_XA_VALUE;
 350     data.has_state = FALSE;
 351     pcmk__xpath_foreach_result(cib->doc, PCMK__XP_GUEST_NODE_CONFIG,
 352                                remote_cache_refresh_helper, &data);
 353     data.field = PCMK_XA_ID;
 354     data.has_state = FALSE;
 355     pcmk__xpath_foreach_result(cib->doc, PCMK__XP_REMOTE_NODE_CONFIG,
 356                                remote_cache_refresh_helper, &data);
 357 
 358     /* Remove all old cache entries that weren't seen in the CIB */
 359     g_hash_table_foreach_remove(pcmk__remote_peer_cache, is_dirty, NULL);
 360 }
 361 
 362 /*!
 363  * \internal
 364  * \brief Check whether a node is an active cluster node
 365  *
 366  * Remote nodes are never considered active. This guarantees that they can never
 367  * become DC.
 368  *
 369  * \param[in] node  Node to check
 370  *
 371  * \return \c true if the node is an active cluster node, or \c false otherwise
 372  */
 373 bool
 374 pcmk__cluster_is_node_active(const pcmk__node_status_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 375 {
 376     const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
 377 
 378     if ((node == NULL) || pcmk_is_set(node->flags, pcmk__node_status_remote)) {
 379         return false;
 380     }
 381 
 382     switch (cluster_layer) {
 383         case pcmk_cluster_layer_corosync:
 384 #if SUPPORT_COROSYNC
 385             return pcmk__corosync_is_peer_active(node);
 386 #else
 387             break;
 388 #endif  // SUPPORT_COROSYNC
 389         default:
 390             break;
 391     }
 392 
 393     crm_err("Unhandled cluster layer: %s",
 394             pcmk_cluster_layer_text(cluster_layer));
 395     return false;
 396 }
 397 
 398 /*!
 399  * \internal
 400  * \brief Check if a node's entry should be removed from the cluster node cache
 401  *
 402  * A node should be removed from the cache if it's inactive and matches another
 403  * \c pcmk__node_status_t (the search object). The node is considered a
 404  * mismatch if any of the following are true:
 405  * * The search object is \c NULL.
 406  * * The search object has an ID set and the cached node's ID does not match it.
 407  * * The search object does not have an ID set, and the cached node's name does
 408  *   not match the search node's name. (If both names are \c NULL, it's a
 409  *   match.)
 410  *
 411  * Otherwise, the node is considered a match.
 412  *
 413  * Note that if the search object has both an ID and a name set, the name is
 414  * ignored for matching purposes.
 415  *
 416  * \param[in] key        Ignored
 417  * \param[in] value      \c pcmk__node_status_t object from cluster node cache
 418  * \param[in] user_data  \c pcmk__node_status_t object to match against (search
 419  *                       object)
 420  *
 421  * \return \c TRUE if the node entry should be removed from \c pcmk__peer_cache,
 422  *         or \c FALSE otherwise
 423  */
 424 static gboolean
 425 should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 426 {
 427     pcmk__node_status_t *node = value;
 428     pcmk__node_status_t *search = user_data;
 429 
 430     if (search == NULL) {
 431         return FALSE;
 432     }
 433     if ((search->cluster_layer_id != 0)
 434         && (node->cluster_layer_id != search->cluster_layer_id)) {
 435         return FALSE;
 436     }
 437     if ((search->cluster_layer_id == 0)
 438         && !pcmk__str_eq(node->name, search->name, pcmk__str_casei)) {
 439         // @TODO Consider name even if ID is set?
 440         return FALSE;
 441     }
 442     if (pcmk__cluster_is_node_active(value)) {
 443         return FALSE;
 444     }
 445 
 446     crm_info("Removing node with name %s and cluster layer ID %" PRIu32
 447              " from membership cache",
 448              pcmk__s(node->name, "(unknown)"), node->cluster_layer_id);
 449     return TRUE;
 450 }
 451 
 452 /*!
 453  * \internal
 454  * \brief Remove one or more inactive nodes from the cluster node cache
 455  *
 456  * All inactive nodes matching \p id and \p node_name as described in
 457  * \c should_forget_cluster_node documentation are removed from the cache.
 458  *
 459  * If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed
 460  * from the cache regardless of ID and name. This differs from clearing the
 461  * cache, in that entries for active nodes are preserved.
 462  *
 463  * \param[in] id         ID of node to remove from cache (0 to ignore)
 464  * \param[in] node_name  Name of node to remove from cache (ignored if \p id is
 465  *                       nonzero)
 466  *
 467  * \note \p node_name is not modified directly, but it will be freed if it's a
 468  *       pointer into a cache entry that is removed.
 469  */
 470 void
 471 pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 472 {
 473     pcmk__node_status_t search = { 0, };
 474     char *criterion = NULL; // For logging
 475     guint matches = 0;
 476 
 477     if (pcmk__peer_cache == NULL) {
 478         crm_trace("Membership cache not initialized, ignoring removal request");
 479         return;
 480     }
 481 
 482     search.cluster_layer_id = id;
 483     search.name = pcmk__str_copy(node_name);    // May log after original freed
 484 
 485     if (id > 0) {
 486         criterion = crm_strdup_printf("cluster layer ID %" PRIu32, id);
 487 
 488     } else if (node_name != NULL) {
 489         criterion = crm_strdup_printf("name %s", node_name);
 490     }
 491 
 492     matches = g_hash_table_foreach_remove(pcmk__peer_cache,
 493                                           should_forget_cluster_node, &search);
 494     if (matches > 0) {
 495         if (criterion != NULL) {
 496             crm_notice("Removed %u inactive node%s with %s from the membership "
 497                        "cache",
 498                        matches, pcmk__plural_s(matches), criterion);
 499         } else {
 500             crm_notice("Removed all (%u) inactive cluster nodes from the "
 501                        "membership cache",
 502                        matches);
 503         }
 504 
 505     } else {
 506         crm_info("No inactive cluster nodes%s%s to remove from the membership "
 507                  "cache",
 508                  ((criterion != NULL)? " with " : ""), pcmk__s(criterion, ""));
 509     }
 510 
 511     free(search.name);
 512     free(criterion);
 513 }
 514 
 515 static void
 516 count_peer(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 517 {
 518     unsigned int *count = user_data;
 519     pcmk__node_status_t *node = value;
 520 
 521     if (pcmk__cluster_is_node_active(node)) {
 522         *count = *count + 1;
 523     }
 524 }
 525 
 526 /*!
 527  * \internal
 528  * \brief Get the number of active cluster nodes that have been seen
 529  *
 530  * Remote nodes are never considered active. This guarantees that they can never
 531  * become DC.
 532  *
 533  * \return Number of active nodes in the cluster node cache
 534  */
 535 unsigned int
 536 pcmk__cluster_num_active_nodes(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 537 {
 538     unsigned int count = 0;
 539 
 540     if (pcmk__peer_cache != NULL) {
 541         g_hash_table_foreach(pcmk__peer_cache, count_peer, &count);
 542     }
 543     return count;
 544 }
 545 
 546 static void
 547 destroy_crm_node(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 548 {
 549     pcmk__node_status_t *node = data;
 550 
 551     crm_trace("Destroying entry for node %" PRIu32 ": %s",
 552               node->cluster_layer_id, node->name);
 553 
 554     free(node->name);
 555     free(node->state);
 556     free(node->xml_id);
 557     free(node->user_data);
 558     free(node->expected);
 559     free(node->conn_host);
 560     free(node);
 561 }
 562 
 563 /*!
 564  * \internal
 565  * \brief Initialize node caches
 566  */
 567 void
 568 pcmk__cluster_init_node_caches(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 569 {
 570     if (pcmk__peer_cache == NULL) {
 571         pcmk__peer_cache = pcmk__strikey_table(free, destroy_crm_node);
 572     }
 573 
 574     if (pcmk__remote_peer_cache == NULL) {
 575         pcmk__remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
 576     }
 577 
 578     if (cluster_node_cib_cache == NULL) {
 579         cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
 580     }
 581 }
 582 
 583 /*!
 584  * \internal
 585  * \brief Initialize node caches
 586  */
 587 void
 588 pcmk__cluster_destroy_node_caches(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 589 {
 590     if (pcmk__peer_cache != NULL) {
 591         crm_trace("Destroying peer cache with %d members",
 592                   g_hash_table_size(pcmk__peer_cache));
 593         g_hash_table_destroy(pcmk__peer_cache);
 594         pcmk__peer_cache = NULL;
 595     }
 596 
 597     if (pcmk__remote_peer_cache != NULL) {
 598         crm_trace("Destroying remote peer cache with %d members",
 599                   pcmk__cluster_num_remote_nodes());
 600         g_hash_table_destroy(pcmk__remote_peer_cache);
 601         pcmk__remote_peer_cache = NULL;
 602     }
 603 
 604     if (cluster_node_cib_cache != NULL) {
 605         crm_trace("Destroying configured cluster node cache with %d members",
 606                   g_hash_table_size(cluster_node_cib_cache));
 607         g_hash_table_destroy(cluster_node_cib_cache);
 608         cluster_node_cib_cache = NULL;
 609     }
 610 }
 611 
 612 static void (*peer_status_callback)(enum pcmk__node_update,
 613                                     pcmk__node_status_t *,
 614                                     const void *) = NULL;
 615 
 616 /*!
 617  * \internal
 618  * \brief Set a client function that will be called after peer status changes
 619  *
 620  * \param[in] dispatch  Pointer to function to use as callback
 621  *
 622  * \note Client callbacks should do only client-specific handling. Callbacks
 623  *       must not add or remove entries in the peer caches.
 624  */
 625 void
 626 pcmk__cluster_set_status_callback(void (*dispatch)(enum pcmk__node_update,
     /* [previous][next][first][last][top][bottom][index][help] */
 627                                                    pcmk__node_status_t *,
 628                                                    const void *))
 629 {
 630     // @TODO Improve documentation of peer_status_callback
 631     peer_status_callback = dispatch;
 632 }
 633 
 634 /*!
 635  * \internal
 636  * \brief Tell the library whether to automatically reap lost nodes
 637  *
 638  * If \c true (the default), calling \c crm_update_peer_proc() will also update
 639  * the peer state to \c PCMK_VALUE_MEMBER or \c PCMK__VALUE_LOST, and updating
 640  * the peer state will reap peers whose state changes to anything other than
 641  * \c PCMK_VALUE_MEMBER.
 642  *
 643  * Callers should leave this enabled unless they plan to manage the cache
 644  * separately on their own.
 645  *
 646  * \param[in] enable  \c true to enable automatic reaping, \c false to disable
 647  */
 648 void
 649 pcmk__cluster_set_autoreap(bool enable)
     /* [previous][next][first][last][top][bottom][index][help] */
 650 {
 651     autoreap = enable;
 652 }
 653 
 654 static void
 655 dump_peer_hash(int level, const char *caller)
     /* [previous][next][first][last][top][bottom][index][help] */
 656 {
 657     GHashTableIter iter;
 658     const char *id = NULL;
 659     pcmk__node_status_t *node = NULL;
 660 
 661     g_hash_table_iter_init(&iter, pcmk__peer_cache);
 662     while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
 663         do_crm_log(level, "%s: Node %" PRIu32 "/%s = %p - %s",
 664                    caller, node->cluster_layer_id, node->name, node, id);
 665     }
 666 }
 667 
 668 static gboolean
 669 hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 670 {
 671     return value == user_data;
 672 }
 673 
 674 /*!
 675  * \internal
 676  * \brief Search cluster member node cache
 677  *
 678  * \param[in] id     If not 0, cluster node ID to search for
 679  * \param[in] uname  If not NULL, node name to search for
 680  * \param[in] uuid   If not NULL while id is 0, node UUID instead of cluster
 681  *                   node ID to search for
 682  *
 683  * \return Cluster node cache entry if found, otherwise NULL
 684  */
 685 static pcmk__node_status_t *
 686 search_cluster_member_cache(unsigned int id, const char *uname,
     /* [previous][next][first][last][top][bottom][index][help] */
 687                             const char *uuid)
 688 {
 689     GHashTableIter iter;
 690     pcmk__node_status_t *node = NULL;
 691     pcmk__node_status_t *by_id = NULL;
 692     pcmk__node_status_t *by_name = NULL;
 693 
 694     pcmk__assert((id > 0) || (uname != NULL));
 695 
 696     pcmk__cluster_init_node_caches();
 697 
 698     if (uname != NULL) {
 699         g_hash_table_iter_init(&iter, pcmk__peer_cache);
 700         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 701             if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) {
 702                 crm_trace("Name match: %s", node->name);
 703                 by_name = node;
 704                 break;
 705             }
 706         }
 707     }
 708 
 709     if (id > 0) {
 710         g_hash_table_iter_init(&iter, pcmk__peer_cache);
 711         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 712             if (node->cluster_layer_id == id) {
 713                 crm_trace("ID match: %" PRIu32, node->cluster_layer_id);
 714                 by_id = node;
 715                 break;
 716             }
 717         }
 718 
 719     } else if (uuid != NULL) {
 720         g_hash_table_iter_init(&iter, pcmk__peer_cache);
 721         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 722             const char *this_xml_id = pcmk__cluster_get_xml_id(node);
 723 
 724             if (pcmk__str_eq(uuid, this_xml_id, pcmk__str_none)) {
 725                 crm_trace("Found cluster node cache entry by XML ID %s",
 726                           this_xml_id);
 727                 by_id = node;
 728                 break;
 729             }
 730         }
 731     }
 732 
 733     node = by_id; /* Good default */
 734     if(by_id == by_name) {
 735         /* Nothing to do if they match (both NULL counts) */
 736         crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
 737 
 738     } else if(by_id == NULL && by_name) {
 739         crm_trace("Only one: %p for %u/%s", by_name, id, uname);
 740 
 741         if (id && by_name->cluster_layer_id) {
 742             dump_peer_hash(LOG_WARNING, __func__);
 743             crm_crit("Nodes %u and %" PRIu32 " share the same name '%s'",
 744                      id, by_name->cluster_layer_id, uname);
 745             node = NULL; /* Create a new one */
 746 
 747         } else {
 748             node = by_name;
 749         }
 750 
 751     } else if(by_name == NULL && by_id) {
 752         crm_trace("Only one: %p for %u/%s", by_id, id, uname);
 753 
 754         if ((uname != NULL) && (by_id->name != NULL)) {
 755             dump_peer_hash(LOG_WARNING, __func__);
 756             crm_crit("Nodes '%s' and '%s' share the same cluster nodeid %u: "
 757                      "assuming '%s' is correct",
 758                      uname, by_id->name, id, uname);
 759         }
 760 
 761     } else if ((uname != NULL) && (by_id->name != NULL)) {
 762         if (pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) {
 763             crm_notice("Node '%s' has changed its cluster layer ID "
 764                        "from %" PRIu32 " to %" PRIu32,
 765                        by_id->name, by_name->cluster_layer_id,
 766                        by_id->cluster_layer_id);
 767             g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data,
 768                                         by_name);
 769 
 770         } else {
 771             crm_warn("Nodes '%s' and '%s' share the same cluster nodeid: %u %s",
 772                      by_id->name, by_name->name, id, uname);
 773             dump_peer_hash(LOG_INFO, __func__);
 774             crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
 775                       TRUE);
 776         }
 777 
 778     } else if ((id > 0) && (by_name->cluster_layer_id > 0)) {
 779         crm_warn("Nodes %" PRIu32 " and %" PRIu32 " share the same name: '%s'",
 780                  by_id->cluster_layer_id, by_name->cluster_layer_id, uname);
 781 
 782     } else {
 783         /* Simple merge */
 784 
 785         /* Only corosync-based clusters use node IDs. The functions that call
 786          * pcmk__update_peer_state() and crm_update_peer_proc() only know
 787          * nodeid, so 'by_id' is authoritative when merging.
 788          */
 789         dump_peer_hash(LOG_DEBUG, __func__);
 790 
 791         crm_info("Merging %p into %p", by_name, by_id);
 792         g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data,
 793                                     by_name);
 794     }
 795 
 796     return node;
 797 }
 798 
 799 /*!
 800  * \internal
 801  * \brief Search caches for a node (cluster or Pacemaker Remote)
 802  *
 803  * \param[in] id      If not 0, cluster node ID to search for
 804  * \param[in] uname   If not NULL, node name to search for
 805  * \param[in] xml_id  If not NULL, CIB XML ID of node to search for
 806  * \param[in] flags   Group of enum pcmk__node_search_flags
 807  *
 808  * \return Node cache entry if found, otherwise NULL
 809  */
 810 pcmk__node_status_t *
 811 pcmk__search_node_caches(unsigned int id, const char *uname,
     /* [previous][next][first][last][top][bottom][index][help] */
 812                          const char *xml_id, uint32_t flags)
 813 {
 814     pcmk__node_status_t *node = NULL;
 815 
 816     pcmk__assert((id > 0) || (uname != NULL) || (xml_id != NULL));
 817 
 818     pcmk__cluster_init_node_caches();
 819 
 820     if (pcmk_is_set(flags, pcmk__node_search_remote)) {
 821         if (uname != NULL) {
 822             node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
 823         } else if (xml_id != NULL) {
 824             node = g_hash_table_lookup(pcmk__remote_peer_cache, xml_id);
 825         }
 826     }
 827 
 828     if ((node == NULL)
 829         && pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
 830 
 831         node = search_cluster_member_cache(id, uname, xml_id);
 832     }
 833 
 834     if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
 835         if (xml_id != NULL) {
 836             node = find_cib_cluster_node(xml_id, uname);
 837         } else {
 838             // Assumes XML ID is node ID as string (as with Corosync)
 839             char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);
 840 
 841             node = find_cib_cluster_node(id_str, uname);
 842             free(id_str);
 843         }
 844     }
 845 
 846     return node;
 847 }
 848 
 849 /*!
 850  * \internal
 851  * \brief Purge a node from cache (both cluster and Pacemaker Remote)
 852  *
 853  * \param[in] node_name  If not NULL, purge only nodes with this name
 854  * \param[in] node_id    If not 0, purge cluster nodes only if they have this ID
 855  *
 856  * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged.
 857  *       If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote
 858  *       nodes that match \p node_name will be purged, and cluster nodes that
 859  *       match both \p node_name and \p node_id will be purged.
 860  * \note The caller must be careful not to use \p node_name after calling this
 861  *       function if it might be a pointer into a cache entry being removed.
 862  */
 863 void
 864 pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 865 {
 866     char *node_name_copy = NULL;
 867 
 868     if ((node_name == NULL) && (node_id == 0U)) {
 869         return;
 870     }
 871 
 872     // Purge from Pacemaker Remote node cache
 873     if ((node_name != NULL)
 874         && (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL)) {
 875         /* node_name could be a pointer into the cache entry being purged,
 876          * so reassign it to a copy before the original gets freed
 877          */
 878         node_name_copy = pcmk__str_copy(node_name);
 879         node_name = node_name_copy;
 880 
 881         crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
 882         g_hash_table_remove(pcmk__remote_peer_cache, node_name);
 883     }
 884 
 885     pcmk__cluster_forget_cluster_node(node_id, node_name);
 886     free(node_name_copy);
 887 }
 888 
 889 #if SUPPORT_COROSYNC
 890 static guint
 891 remove_conflicting_peer(pcmk__node_status_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 892 {
 893     int matches = 0;
 894     GHashTableIter iter;
 895     pcmk__node_status_t *existing_node = NULL;
 896 
 897     if ((node->cluster_layer_id == 0) || (node->name == NULL)) {
 898         return 0;
 899     }
 900 
 901     if (!pcmk__corosync_has_nodelist()) {
 902         return 0;
 903     }
 904 
 905     g_hash_table_iter_init(&iter, pcmk__peer_cache);
 906     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
 907         if ((existing_node->cluster_layer_id > 0)
 908             && (existing_node->cluster_layer_id != node->cluster_layer_id)
 909             && pcmk__str_eq(existing_node->name, node->name, pcmk__str_casei)) {
 910 
 911             if (pcmk__cluster_is_node_active(existing_node)) {
 912                 continue;
 913             }
 914 
 915             crm_warn("Removing cached offline node %" PRIu32 "/%s which has "
 916                      "conflicting name with %" PRIu32,
 917                      existing_node->cluster_layer_id, existing_node->name,
 918                      node->cluster_layer_id);
 919 
 920             g_hash_table_iter_remove(&iter);
 921             matches++;
 922         }
 923     }
 924 
 925     return matches;
 926 }
 927 #endif
 928 
 929 /*!
 930  * \internal
 931  * \brief Get a cluster node cache entry, possibly creating one if not found
 932  *
 933  * If \c pcmk__node_search_cluster_member is set in \p flags, the return value
 934  * is guaranteed not to be \c NULL. A new cache entry is created if one does not
 935  * already exist.
 936  *
 937  * \param[in] id      If not 0, cluster node ID to search for
 938  * \param[in] uname   If not NULL, node name to search for
 939  * \param[in] xml_id  If not NULL while \p id is 0, search for this CIB XML ID
 940  *                    instead of a cluster ID
 941  * \param[in] flags   Group of enum pcmk__node_search_flags
 942  *
 943  * \return (Possibly newly created) cluster node cache entry
 944  */
 945 /* coverity[-alloc] Memory is referenced in one or both hashtables */
 946 pcmk__node_status_t *
 947 pcmk__get_node(unsigned int id, const char *uname, const char *xml_id,
     /* [previous][next][first][last][top][bottom][index][help] */
 948                uint32_t flags)
 949 {
 950     pcmk__node_status_t *node = NULL;
 951     char *uname_lookup = NULL;
 952 
 953     pcmk__assert((id > 0) || (uname != NULL));
 954 
 955     pcmk__cluster_init_node_caches();
 956 
 957     // Check the Pacemaker Remote node cache first
 958     if (pcmk_is_set(flags, pcmk__node_search_remote)) {
 959         node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
 960         if (node != NULL) {
 961             return node;
 962         }
 963     }
 964 
 965     if (!pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
 966         return NULL;
 967     }
 968 
 969     node = search_cluster_member_cache(id, uname, xml_id);
 970 
 971     /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
 972      * we need to do a lookup of the node name using the id in the cluster membership. */
 973     if ((uname == NULL) && ((node == NULL) || (node->name == NULL))) {
 974         uname_lookup = pcmk__cluster_node_name(id);
 975     }
 976 
 977     if (uname_lookup) {
 978         uname = uname_lookup;
 979         crm_trace("Inferred a name of '%s' for node %u", uname, id);
 980 
 981         /* try to turn up the node one more time now that we know the uname. */
 982         if (node == NULL) {
 983             node = search_cluster_member_cache(id, uname, xml_id);
 984         }
 985     }
 986 
 987     if (node == NULL) {
 988         char *uniqueid = crm_generate_uuid();
 989 
 990         node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t));
 991 
 992         crm_info("Created entry %s/%p for node %s/%u (%d total)",
 993                  uniqueid, node, uname, id,
 994                  1 + g_hash_table_size(pcmk__peer_cache));
 995         g_hash_table_replace(pcmk__peer_cache, uniqueid, node);
 996     }
 997 
 998     if ((id > 0) && (uname != NULL)
 999         && ((node->cluster_layer_id == 0) || (node->name == NULL))) {
1000         crm_info("Node %u is now known as %s", id, uname);
1001     }
1002 
1003     if ((id > 0) && (node->cluster_layer_id == 0)) {
1004         node->cluster_layer_id = id;
1005     }
1006 
1007     if ((uname != NULL) && (node->name == NULL)) {
1008         update_peer_uname(node, uname);
1009     }
1010 
1011     if ((xml_id == NULL) && (node->xml_id == NULL)) {
1012         xml_id = pcmk__cluster_get_xml_id(node);
1013         if (xml_id == NULL) {
1014             crm_debug("Cannot obtain an XML ID for node %s[%u] at this time",
1015                       node->name, id);
1016         } else {
1017             crm_info("Node %s[%u] has XML ID %s", node->name, id, xml_id);
1018         }
1019     }
1020 
1021     free(uname_lookup);
1022 
1023     return node;
1024 }
1025 
1026 /*!
1027  * \internal
1028  * \brief Update a node's uname
1029  *
1030  * \param[in,out] node   Node object to update
1031  * \param[in]     uname  New name to set
1032  *
1033  * \note This function should not be called within a peer cache iteration,
1034  *       because in some cases it can remove conflicting cache entries,
1035  *       which would invalidate the iterator.
1036  */
1037 static void
1038 update_peer_uname(pcmk__node_status_t *node, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
1039 {
1040     CRM_CHECK(uname != NULL,
1041               crm_err("Bug: can't update node name without name"); return);
1042     CRM_CHECK(node != NULL,
1043               crm_err("Bug: can't update node name to %s without node", uname);
1044               return);
1045 
1046     if (pcmk__str_eq(uname, node->name, pcmk__str_casei)) {
1047         crm_debug("Node name '%s' did not change", uname);
1048         return;
1049     }
1050 
1051     for (const char *c = uname; *c; ++c) {
1052         if ((*c >= 'A') && (*c <= 'Z')) {
1053             crm_warn("Node names with capitals are discouraged, consider changing '%s'",
1054                      uname);
1055             break;
1056         }
1057     }
1058 
1059     pcmk__str_update(&node->name, uname);
1060 
1061     if (peer_status_callback != NULL) {
1062         peer_status_callback(pcmk__node_update_name, node, NULL);
1063     }
1064 
1065 #if SUPPORT_COROSYNC
1066     if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)
1067         && !pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1068 
1069         remove_conflicting_peer(node);
1070     }
1071 #endif
1072 }
1073 
1074 /*!
1075  * \internal
1076  * \brief Get log-friendly string equivalent of a process flag
1077  *
1078  * \param[in] proc  Process flag
1079  *
1080  * \return Log-friendly string equivalent of \p proc
1081  */
1082 static inline const char *
1083 proc2text(enum crm_proc_flag proc)
     /* [previous][next][first][last][top][bottom][index][help] */
1084 {
1085     const char *text = "unknown";
1086 
1087     switch (proc) {
1088         case crm_proc_none:
1089             text = "none";
1090             break;
1091         case crm_proc_cpg:
1092             text = "corosync-cpg";
1093             break;
1094     }
1095     return text;
1096 }
1097 
1098 /*!
1099  * \internal
1100  * \brief Update a node's process information (and potentially state)
1101  *
1102  * \param[in]     source  Caller's function name (for log messages)
1103  * \param[in,out] node    Node object to update
1104  * \param[in]     flag    Bitmask of new process information
1105  * \param[in]     status  node status (online, offline, etc.)
1106  *
1107  * \return NULL if any node was reaped from peer caches, value of node otherwise
1108  *
1109  * \note If this function returns NULL, the supplied node object was likely
1110  *       freed and should not be used again. This function should not be
1111  *       called within a cache iteration if reaping is possible, otherwise
1112  *       reaping could invalidate the iterator.
1113  */
1114 pcmk__node_status_t *
1115 crm_update_peer_proc(const char *source, pcmk__node_status_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1116                      uint32_t flag, const char *status)
1117 {
1118     uint32_t last = 0;
1119     gboolean changed = FALSE;
1120 
1121     CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
1122                                     source, proc2text(flag), status);
1123                             return NULL);
1124 
1125     /* Pacemaker doesn't spawn processes on remote nodes */
1126     if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1127         return node;
1128     }
1129 
1130     last = node->processes;
1131     if (status == NULL) {
1132         node->processes = flag;
1133         if (node->processes != last) {
1134             changed = TRUE;
1135         }
1136 
1137     } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1138         if ((node->processes & flag) != flag) {
1139             node->processes = pcmk__set_flags_as(__func__, __LINE__,
1140                                                  LOG_TRACE, "Peer process",
1141                                                  node->name, node->processes,
1142                                                  flag, "processes");
1143             changed = TRUE;
1144         }
1145 
1146     } else if (node->processes & flag) {
1147         node->processes = pcmk__clear_flags_as(__func__, __LINE__,
1148                                                LOG_TRACE, "Peer process",
1149                                                node->name, node->processes,
1150                                                flag, "processes");
1151         changed = TRUE;
1152     }
1153 
1154     if (changed) {
1155         if (status == NULL && flag <= crm_proc_none) {
1156             crm_info("%s: Node %s[%" PRIu32 "] - all processes are now offline",
1157                      source, node->name, node->cluster_layer_id);
1158         } else {
1159             crm_info("%s: Node %s[%" PRIu32 "] - %s is now %s",
1160                      source, node->name, node->cluster_layer_id,
1161                      proc2text(flag), status);
1162         }
1163 
1164         if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1165             node->when_online = time(NULL);
1166 
1167         } else {
1168             node->when_online = 0;
1169         }
1170 
1171         /* Call the client callback first, then update the peer state,
1172          * in case the node will be reaped
1173          */
1174         if (peer_status_callback != NULL) {
1175             peer_status_callback(pcmk__node_update_processes, node, &last);
1176         }
1177 
1178         /* The client callback shouldn't touch the peer caches,
1179          * but as a safety net, bail if the peer cache was destroyed.
1180          */
1181         if (pcmk__peer_cache == NULL) {
1182             return NULL;
1183         }
1184 
1185         if (autoreap) {
1186             const char *peer_state = NULL;
1187 
1188             if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1189                 peer_state = PCMK_VALUE_MEMBER;
1190             } else {
1191                 peer_state = PCMK__VALUE_LOST;
1192             }
1193             node = pcmk__update_peer_state(__func__, node, peer_state, 0);
1194         }
1195     } else {
1196         crm_trace("%s: Node %s[%" PRIu32 "] - %s is unchanged (%s)",
1197                   source, node->name, node->cluster_layer_id, proc2text(flag),
1198                   status);
1199     }
1200     return node;
1201 }
1202 
1203 /*!
1204  * \internal
1205  * \brief Update a cluster node cache entry's expected join state
1206  *
1207  * \param[in]     source    Caller's function name (for logging)
1208  * \param[in,out] node      Node to update
1209  * \param[in]     expected  Node's new join state
1210  */
1211 void
1212 pcmk__update_peer_expected(const char *source, pcmk__node_status_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1213                            const char *expected)
1214 {
1215     char *last = NULL;
1216     gboolean changed = FALSE;
1217 
1218     CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
1219               return);
1220 
1221     /* Remote nodes don't participate in joins */
1222     if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1223         return;
1224     }
1225 
1226     last = node->expected;
1227     if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
1228         node->expected = strdup(expected);
1229         changed = TRUE;
1230     }
1231 
1232     if (changed) {
1233         crm_info("%s: Node %s[%" PRIu32 "] - expected state is now %s (was %s)",
1234                  source, node->name, node->cluster_layer_id, expected, last);
1235         free(last);
1236     } else {
1237         crm_trace("%s: Node %s[%" PRIu32 "] - expected state is unchanged (%s)",
1238                   source, node->name, node->cluster_layer_id, expected);
1239     }
1240 }
1241 
1242 /*!
1243  * \internal
1244  * \brief Update a node's state and membership information
1245  *
1246  * \param[in]     source      Caller's function name (for log messages)
1247  * \param[in,out] node        Node object to update
1248  * \param[in]     state       Node's new state
1249  * \param[in]     membership  Node's new membership ID
1250  * \param[in,out] iter        If not NULL, pointer to node's peer cache iterator
1251  *
1252  * \return NULL if any node was reaped, value of node otherwise
1253  *
1254  * \note If this function returns NULL, the supplied node object was likely
1255  *       freed and should not be used again. This function may be called from
1256  *       within a peer cache iteration if the iterator is supplied.
1257  */
1258 static pcmk__node_status_t *
1259 update_peer_state_iter(const char *source, pcmk__node_status_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1260                        const char *state, uint64_t membership,
1261                        GHashTableIter *iter)
1262 {
1263     gboolean is_member;
1264 
1265     CRM_CHECK(node != NULL,
1266               crm_err("Could not set state for unknown host to %s "
1267                       QB_XS " source=%s", state, source);
1268               return NULL);
1269 
1270     is_member = pcmk__str_eq(state, PCMK_VALUE_MEMBER, pcmk__str_none);
1271     if (is_member) {
1272         node->when_lost = 0;
1273         if (membership) {
1274             node->membership_id = membership;
1275         }
1276     }
1277 
1278     if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
1279         char *last = node->state;
1280 
1281         if (is_member) {
1282              node->when_member = time(NULL);
1283 
1284         } else {
1285              node->when_member = 0;
1286         }
1287 
1288         node->state = strdup(state);
1289         crm_notice("Node %s state is now %s " QB_XS
1290                    " nodeid=%" PRIu32 " previous=%s source=%s",
1291                    node->name, state, node->cluster_layer_id,
1292                    pcmk__s(last, "unknown"), source);
1293         if (peer_status_callback != NULL) {
1294             peer_status_callback(pcmk__node_update_state, node, last);
1295         }
1296         free(last);
1297 
1298         if (autoreap && !is_member
1299             && !pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1300             /* We only autoreap from the peer cache, not the remote peer cache,
1301              * because the latter should be managed only by
1302              * refresh_remote_nodes().
1303              */
1304             if(iter) {
1305                 crm_notice("Purged 1 peer with cluster layer ID %" PRIu32
1306                            "and/or name=%s from the membership cache",
1307                            node->cluster_layer_id, node->name);
1308                 g_hash_table_iter_remove(iter);
1309 
1310             } else {
1311                 pcmk__cluster_forget_cluster_node(node->cluster_layer_id,
1312                                                   node->name);
1313             }
1314             node = NULL;
1315         }
1316 
1317     } else {
1318         crm_trace("Node %s state is unchanged (%s) " QB_XS
1319                   " nodeid=%" PRIu32 " source=%s",
1320                   node->name, state, node->cluster_layer_id, source);
1321     }
1322     return node;
1323 }
1324 
1325 /*!
1326  * \brief Update a node's state and membership information
1327  *
1328  * \param[in]     source      Caller's function name (for log messages)
1329  * \param[in,out] node        Node object to update
1330  * \param[in]     state       Node's new state
1331  * \param[in]     membership  Node's new membership ID
1332  *
1333  * \return NULL if any node was reaped, value of node otherwise
1334  *
1335  * \note If this function returns NULL, the supplied node object was likely
1336  *       freed and should not be used again. This function should not be
1337  *       called within a cache iteration if reaping is possible,
1338  *       otherwise reaping could invalidate the iterator.
1339  */
1340 pcmk__node_status_t *
1341 pcmk__update_peer_state(const char *source, pcmk__node_status_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1342                         const char *state, uint64_t membership)
1343 {
1344     return update_peer_state_iter(source, node, state, membership, NULL);
1345 }
1346 
1347 /*!
1348  * \internal
1349  * \brief Reap all nodes from cache whose membership information does not match
1350  *
1351  * \param[in] membership  Membership ID of nodes to keep
1352  */
1353 void
1354 pcmk__reap_unseen_nodes(uint64_t membership)
     /* [previous][next][first][last][top][bottom][index][help] */
1355 {
1356     GHashTableIter iter;
1357     pcmk__node_status_t *node = NULL;
1358 
1359     crm_trace("Reaping unseen nodes...");
1360     g_hash_table_iter_init(&iter, pcmk__peer_cache);
1361     while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1362         if (node->membership_id != membership) {
1363             if (node->state) {
1364                 /* Calling update_peer_state_iter() allows us to remove the node
1365                  * from pcmk__peer_cache without invalidating our iterator
1366                  */
1367                 update_peer_state_iter(__func__, node, PCMK__VALUE_LOST,
1368                                        membership, &iter);
1369 
1370             } else {
1371                 crm_info("State of node %s[%" PRIu32 "] is still unknown",
1372                          node->name, node->cluster_layer_id);
1373             }
1374         }
1375     }
1376 }
1377 
1378 static pcmk__node_status_t *
1379 find_cib_cluster_node(const char *id, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
1380 {
1381     GHashTableIter iter;
1382     pcmk__node_status_t *node = NULL;
1383     pcmk__node_status_t *by_id = NULL;
1384     pcmk__node_status_t *by_name = NULL;
1385 
1386     if (uname) {
1387         g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1388         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1389             if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) {
1390                 crm_trace("Name match: %s = %p", node->name, node);
1391                 by_name = node;
1392                 break;
1393             }
1394         }
1395     }
1396 
1397     if (id) {
1398         g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1399         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1400             if (pcmk__str_eq(id, pcmk__cluster_get_xml_id(node),
1401                              pcmk__str_none)) {
1402                 crm_trace("ID match: %s= %p", id, node);
1403                 by_id = node;
1404                 break;
1405             }
1406         }
1407     }
1408 
1409     node = by_id; /* Good default */
1410     if (by_id == by_name) {
1411         /* Nothing to do if they match (both NULL counts) */
1412         crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1413 
1414     } else if (by_id == NULL && by_name) {
1415         crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1416 
1417         if (id) {
1418             node = NULL;
1419 
1420         } else {
1421             node = by_name;
1422         }
1423 
1424     } else if (by_name == NULL && by_id) {
1425         crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1426 
1427         if (uname) {
1428             node = NULL;
1429         }
1430 
1431     } else if ((uname != NULL) && (by_id->name != NULL)
1432                && pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) {
1433         /* Multiple nodes have the same uname in the CIB.
1434          * Return by_id. */
1435 
1436     } else if ((id != NULL) && (by_name->xml_id != NULL)
1437                && pcmk__str_eq(id, by_name->xml_id, pcmk__str_none)) {
1438         /* Multiple nodes have the same id in the CIB.
1439          * Return by_name. */
1440         node = by_name;
1441 
1442     } else {
1443         node = NULL;
1444     }
1445 
1446     if (node == NULL) {
1447         crm_debug("Couldn't find node%s%s%s%s",
1448                    id? " " : "",
1449                    id? id : "",
1450                    uname? " with name " : "",
1451                    uname? uname : "");
1452     }
1453 
1454     return node;
1455 }
1456 
1457 static void
1458 cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1459 {
1460     const char *id = crm_element_value(xml_node, PCMK_XA_ID);
1461     const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME);
1462     pcmk__node_status_t * node =  NULL;
1463 
1464     CRM_CHECK(id != NULL && uname !=NULL, return);
1465     node = find_cib_cluster_node(id, uname);
1466 
1467     if (node == NULL) {
1468         char *uniqueid = crm_generate_uuid();
1469 
1470         node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t));
1471 
1472         node->name = pcmk__str_copy(uname);
1473         node->xml_id = pcmk__str_copy(id);
1474 
1475         g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);
1476 
1477     } else if (pcmk_is_set(node->flags, pcmk__node_status_dirty)) {
1478         pcmk__str_update(&node->name, uname);
1479 
1480         /* Node is in cache and hasn't been updated already, so mark it clean */
1481         clear_peer_flags(node, pcmk__node_status_dirty);
1482     }
1483 
1484 }
1485 
1486 static void
1487 refresh_cluster_node_cib_cache(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1488 {
1489     pcmk__cluster_init_node_caches();
1490 
1491     g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);
1492 
1493     pcmk__xpath_foreach_result(cib->doc, PCMK__XP_MEMBER_NODE_CONFIG,
1494                                cluster_node_cib_cache_refresh_helper, NULL);
1495 
1496     // Remove all old cache entries that weren't seen in the CIB
1497     g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
1498 }
1499 
1500 void
1501 pcmk__refresh_node_caches_from_cib(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1502 {
1503     refresh_remote_nodes(cib);
1504     refresh_cluster_node_cib_cache(cib);
1505 }
1506 
1507 // Deprecated functions kept only for backward API compatibility
1508 // LCOV_EXCL_START
1509 
1510 #include <crm/cluster/compat.h>
1511 
1512 void
1513 crm_peer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1514 {
1515     pcmk__cluster_init_node_caches();
1516 }
1517 
1518 // LCOV_EXCL_STOP
1519 // End deprecated API

/* [previous][next][first][last][top][bottom][index][help] */