root/lib/cluster/membership.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. pcmk__cluster_has_quorum
  2. pcmk__cluster_set_quorum
  3. pcmk__cluster_num_remote_nodes
  4. pcmk__cluster_lookup_remote_node
  5. pcmk__cluster_forget_remote_node
  6. remote_state_from_cib
  7. remote_cache_refresh_helper
  8. mark_dirty
  9. is_dirty
  10. refresh_remote_nodes
  11. pcmk__cluster_is_node_active
  12. should_forget_cluster_node
  13. pcmk__cluster_forget_cluster_node
  14. count_peer
  15. pcmk__cluster_num_active_nodes
  16. destroy_crm_node
  17. pcmk__cluster_init_node_caches
  18. pcmk__cluster_destroy_node_caches
  19. pcmk__cluster_set_status_callback
  20. pcmk__cluster_set_autoreap
  21. dump_peer_hash
  22. hash_find_by_data
  23. search_cluster_member_cache
  24. pcmk__search_node_caches
  25. pcmk__purge_node_from_cache
  26. remove_conflicting_peer
  27. pcmk__get_node
  28. update_peer_uname
  29. proc2text
  30. crm_update_peer_proc
  31. pcmk__update_peer_expected
  32. update_peer_state_iter
  33. pcmk__update_peer_state
  34. pcmk__reap_unseen_nodes
  35. find_cib_cluster_node
  36. cluster_node_cib_cache_refresh_helper
  37. refresh_cluster_node_cib_cache
  38. pcmk__refresh_node_caches_from_cib
  39. crm_peer_init

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <inttypes.h>                   // PRIu32
  13 #include <stdbool.h>                    // bool
  14 #include <stdio.h>
  15 #include <string.h>
  16 #include <sys/param.h>
  17 #include <sys/types.h>
  18 #include <unistd.h>
  19 
  20 #include <glib.h>
  21 
  22 #include <crm/common/ipc.h>
  23 #include <crm/common/xml_internal.h>
  24 #include <crm/cluster/internal.h>
  25 #include <crm/common/xml.h>
  26 #include <crm/stonith-ng.h>
  27 #include "crmcluster_private.h"
  28 
  29 /* The peer cache remembers cluster nodes that have been seen. This is managed
  30  * mostly automatically by libcrmcluster, based on cluster membership events.
  31  *
  32  * Because cluster nodes can have conflicting names or UUIDs, the hash table key
  33  * is a uniquely generated ID.
  34  *
  35  * @TODO Move caches to pcmk_cluster_t
  36  */
  37 GHashTable *pcmk__peer_cache = NULL;
  38 
  39 /* The remote peer cache tracks pacemaker_remote nodes. While the
  40  * value has the same type as the peer cache's, it is tracked separately for
  41  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
  42  * so the name (which is also the UUID) is used as the hash table key; there
  43  * is no equivalent of membership events, so management is not automatic; and
  44  * most users of the peer cache need to exclude pacemaker_remote nodes.
  45  *
  46  * @TODO That said, using a single cache would be more logical and less
  47  * error-prone, so it would be a good idea to merge them one day.
  48  *
  49  * libcrmcluster provides two avenues for populating the cache:
  50  * pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node()
  51  * directly manage it, while refresh_remote_nodes() populates it via the CIB.
  52  *
  53  * @TODO Move caches to pcmk_cluster_t
  54  */
  55 GHashTable *pcmk__remote_peer_cache = NULL;
  56 
  57 /*
  58  * The CIB cluster node cache tracks cluster nodes that have been seen in
  59  * the CIB. It is useful mainly when a caller needs to know about a node that
  60  * may no longer be in the membership, but doesn't want to add the node to the
  61  * main peer cache tables.
  62  */
  63 static GHashTable *cluster_node_cib_cache = NULL;
  64 
  65 static bool autoreap = true;
  66 static bool has_quorum = false;
  67 
  68 // Flag setting and clearing for pcmk__node_status_t:flags
  69 
  70 #define set_peer_flags(peer, flags_to_set) do {                               \
  71         (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE,     \
  72                                            "Peer", (peer)->name,              \
  73                                            (peer)->flags, (flags_to_set),     \
  74                                            #flags_to_set);                    \
  75     } while (0)
  76 
  77 #define clear_peer_flags(peer, flags_to_clear) do {                           \
  78         (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__,              \
  79                                              LOG_TRACE,                       \
  80                                              "Peer", (peer)->name,            \
  81                                              (peer)->flags, (flags_to_clear), \
  82                                              #flags_to_clear);                \
  83     } while (0)
  84 
  85 static void update_peer_uname(pcmk__node_status_t *node, const char *uname);
  86 static pcmk__node_status_t *find_cib_cluster_node(const char *id,
  87                                                   const char *uname);
  88 
  89 /*!
  90  * \internal
  91  * \brief Check whether the cluster currently has quorum
  92  *
  93  * \return \c true if the cluster has quorum, or \c false otherwise
  94  */
  95 bool
  96 pcmk__cluster_has_quorum(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  97 {
  98     return has_quorum;
  99 }
 100 
 101 /*!
 102  * \internal
 103  * \brief Set whether the cluster currently has quorum
 104  *
 105  * \param[in] quorate  \c true if the cluster has quorum, or \c false otherwise
 106  */
 107 void
 108 pcmk__cluster_set_quorum(bool quorate)
     /* [previous][next][first][last][top][bottom][index][help] */
 109 {
 110     has_quorum = quorate;
 111 }
 112 
 113 /*!
 114  * \internal
 115  * \brief Get the number of Pacemaker Remote nodes that have been seen
 116  *
 117  * \return Number of cached Pacemaker Remote nodes
 118  */
 119 unsigned int
 120 pcmk__cluster_num_remote_nodes(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 121 {
 122     if (pcmk__remote_peer_cache == NULL) {
 123         return 0U;
 124     }
 125     return g_hash_table_size(pcmk__remote_peer_cache);
 126 }
 127 
 128 /*!
 129  * \internal
 130  * \brief Get a remote node cache entry, creating it if necessary
 131  *
 132  * \param[in] node_name  Name of remote node
 133  *
 134  * \return Cache entry for node on success, or \c NULL (and set \c errno)
 135  *         otherwise
 136  *
 137  * \note When creating a new entry, this will leave the node state undetermined.
 138  *       The caller should also call \c pcmk__update_peer_state() if the state
 139  *       is known.
 140  * \note Because this can add and remove cache entries, callers should not
 141  *       assume any previously obtained cache entry pointers remain valid.
 142  */
 143 pcmk__node_status_t *
 144 pcmk__cluster_lookup_remote_node(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 145 {
 146     pcmk__node_status_t *node = NULL;
 147     char *node_name_copy = NULL;
 148 
 149     if (node_name == NULL) {
 150         errno = EINVAL;
 151         return NULL;
 152     }
 153 
 154     /* It's theoretically possible that the node was added to the cluster peer
 155      * cache before it was known to be a Pacemaker Remote node. Remove that
 156      * entry unless it has a node ID, which means the name actually is
 157      * associated with a cluster node. (@TODO return an error in that case?)
 158      */
 159     node = pcmk__search_node_caches(0, node_name,
 160                                     pcmk__node_search_cluster_member);
 161     if ((node != NULL) && (node->xml_id == NULL)) {
 162         /* node_name could be a pointer into the cache entry being removed, so
 163          * reassign it to a copy before the original gets freed
 164          */
 165         node_name_copy = strdup(node_name);
 166         if (node_name_copy == NULL) {
 167             errno = ENOMEM;
 168             return NULL;
 169         }
 170         node_name = node_name_copy;
 171         pcmk__cluster_forget_cluster_node(0, node_name);
 172     }
 173 
 174     /* Return existing cache entry if one exists */
 175     node = g_hash_table_lookup(pcmk__remote_peer_cache, node_name);
 176     if (node) {
 177         free(node_name_copy);
 178         return node;
 179     }
 180 
 181     /* Allocate a new entry */
 182     node = calloc(1, sizeof(pcmk__node_status_t));
 183     if (node == NULL) {
 184         free(node_name_copy);
 185         return NULL;
 186     }
 187 
 188     /* Populate the essential information */
 189     set_peer_flags(node, pcmk__node_status_remote);
 190     node->xml_id = strdup(node_name);
 191     if (node->xml_id == NULL) {
 192         free(node);
 193         errno = ENOMEM;
 194         free(node_name_copy);
 195         return NULL;
 196     }
 197 
 198     /* Add the new entry to the cache */
 199     g_hash_table_replace(pcmk__remote_peer_cache, node->xml_id, node);
 200     crm_trace("added %s to remote cache", node_name);
 201 
 202     /* Update the entry's uname, ensuring peer status callbacks are called */
 203     update_peer_uname(node, node_name);
 204     free(node_name_copy);
 205     return node;
 206 }
 207 
 208 /*!
 209  * \internal
 210  * \brief Remove a node from the Pacemaker Remote node cache
 211  *
 212  * \param[in] node_name  Name of node to remove from cache
 213  *
 214  * \note The caller must be careful not to use \p node_name after calling this
 215  *       function if it might be a pointer into the cache entry being removed.
 216  */
 217 void
 218 pcmk__cluster_forget_remote_node(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 219 {
 220     /* Do a lookup first, because node_name could be a pointer within the entry
 221      * being removed -- we can't log it *after* removing it.
 222      */
 223     if (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL) {
 224         crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
 225         g_hash_table_remove(pcmk__remote_peer_cache, node_name);
 226     }
 227 }
 228 
 229 /*!
 230  * \internal
 231  * \brief Return node status based on a CIB status entry
 232  *
 233  * \param[in] node_state  XML of node state
 234  *
 235  * \return \c PCMK_VALUE_MEMBER if \c PCMK__XA_IN_CCM is true in
 236  *         \c PCMK__XE_NODE_STATE, or \c PCMK__VALUE_LOST otherwise
 237  */
 238 static const char *
 239 remote_state_from_cib(const xmlNode *node_state)
     /* [previous][next][first][last][top][bottom][index][help] */
 240 {
 241     bool in_ccm = false;
 242 
 243     if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM,
 244                                 &in_ccm) == pcmk_rc_ok) && in_ccm) {
 245         return PCMK_VALUE_MEMBER;
 246     }
 247     return PCMK__VALUE_LOST;
 248 }
 249 
 250 /* user data for looping through remote node xpath searches */
 251 struct refresh_data {
 252     const char *field;  /* XML attribute to check for node name */
 253     gboolean has_state; /* whether to update node state based on XML */
 254 };
 255 
 256 /*!
 257  * \internal
 258  * \brief Process one pacemaker_remote node xpath search result
 259  *
 260  * \param[in] result     XML search result
 261  * \param[in] user_data  what to look for in the XML
 262  */
 263 static void
 264 remote_cache_refresh_helper(xmlNode *result, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 265 {
 266     const struct refresh_data *data = user_data;
 267     const char *remote = crm_element_value(result, data->field);
 268     const char *state = NULL;
 269     pcmk__node_status_t *node;
 270 
 271     CRM_CHECK(remote != NULL, return);
 272 
 273     /* Determine node's state, if the result has it */
 274     if (data->has_state) {
 275         state = remote_state_from_cib(result);
 276     }
 277 
 278     /* Check whether cache already has entry for node */
 279     node = g_hash_table_lookup(pcmk__remote_peer_cache, remote);
 280 
 281     if (node == NULL) {
 282         /* Node is not in cache, so add a new entry for it */
 283         node = pcmk__cluster_lookup_remote_node(remote);
 284         pcmk__assert(node != NULL);
 285         if (state) {
 286             pcmk__update_peer_state(__func__, node, state, 0);
 287         }
 288 
 289     } else if (pcmk_is_set(node->flags, pcmk__node_status_dirty)) {
 290         /* Node is in cache and hasn't been updated already, so mark it clean */
 291         clear_peer_flags(node, pcmk__node_status_dirty);
 292         if (state) {
 293             pcmk__update_peer_state(__func__, node, state, 0);
 294         }
 295     }
 296 }
 297 
 298 static void
 299 mark_dirty(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 300 {
 301     set_peer_flags((pcmk__node_status_t *) value, pcmk__node_status_dirty);
 302 }
 303 
 304 static gboolean
 305 is_dirty(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 306 {
 307     const pcmk__node_status_t *node = value;
 308 
 309     return pcmk_is_set(node->flags, pcmk__node_status_dirty);
 310 }
 311 
 312 /*!
 313  * \internal
 314  * \brief Repopulate the remote node cache based on CIB XML
 315  *
 316  * \param[in] cib  CIB XML to parse
 317  */
 318 static void
 319 refresh_remote_nodes(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
 320 {
 321     struct refresh_data data;
 322 
 323     pcmk__cluster_init_node_caches();
 324 
 325     /* First, we mark all existing cache entries as dirty,
 326      * so that later we can remove any that weren't in the CIB.
 327      * We don't empty the cache, because we need to detect changes in state.
 328      */
 329     g_hash_table_foreach(pcmk__remote_peer_cache, mark_dirty, NULL);
 330 
 331     /* Look for guest nodes and remote nodes in the status section */
 332     data.field = PCMK_XA_ID;
 333     data.has_state = TRUE;
 334     crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS,
 335                              remote_cache_refresh_helper, &data);
 336 
 337     /* Look for guest nodes and remote nodes in the configuration section,
 338      * because they may have just been added and not have a status entry yet.
 339      * In that case, the cached node state will be left NULL, so that the
 340      * peer status callback isn't called until we're sure the node started
 341      * successfully.
 342      */
 343     data.field = PCMK_XA_VALUE;
 344     data.has_state = FALSE;
 345     crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG,
 346                              remote_cache_refresh_helper, &data);
 347     data.field = PCMK_XA_ID;
 348     data.has_state = FALSE;
 349     crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG,
 350                              remote_cache_refresh_helper, &data);
 351 
 352     /* Remove all old cache entries that weren't seen in the CIB */
 353     g_hash_table_foreach_remove(pcmk__remote_peer_cache, is_dirty, NULL);
 354 }
 355 
 356 /*!
 357  * \internal
 358  * \brief Check whether a node is an active cluster node
 359  *
 360  * Remote nodes are never considered active. This guarantees that they can never
 361  * become DC.
 362  *
 363  * \param[in] node  Node to check
 364  *
 365  * \return \c true if the node is an active cluster node, or \c false otherwise
 366  */
 367 bool
 368 pcmk__cluster_is_node_active(const pcmk__node_status_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 369 {
 370     const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
 371 
 372     if ((node == NULL) || pcmk_is_set(node->flags, pcmk__node_status_remote)) {
 373         return false;
 374     }
 375 
 376     switch (cluster_layer) {
 377         case pcmk_cluster_layer_corosync:
 378 #if SUPPORT_COROSYNC
 379             return pcmk__corosync_is_peer_active(node);
 380 #else
 381             break;
 382 #endif  // SUPPORT_COROSYNC
 383         default:
 384             break;
 385     }
 386 
 387     crm_err("Unhandled cluster layer: %s",
 388             pcmk_cluster_layer_text(cluster_layer));
 389     return false;
 390 }
 391 
 392 /*!
 393  * \internal
 394  * \brief Check if a node's entry should be removed from the cluster node cache
 395  *
 396  * A node should be removed from the cache if it's inactive and matches another
 397  * \c pcmk__node_status_t (the search object). The node is considered a
 398  * mismatch if any of the following are true:
 399  * * The search object is \c NULL.
 400  * * The search object has an ID set and the cached node's ID does not match it.
 401  * * The search object does not have an ID set, and the cached node's name does
 402  *   not match the search node's name. (If both names are \c NULL, it's a
 403  *   match.)
 404  *
 405  * Otherwise, the node is considered a match.
 406  *
 407  * Note that if the search object has both an ID and a name set, the name is
 408  * ignored for matching purposes.
 409  *
 410  * \param[in] key        Ignored
 411  * \param[in] value      \c pcmk__node_status_t object from cluster node cache
 412  * \param[in] user_data  \c pcmk__node_status_t object to match against (search
 413  *                       object)
 414  *
 415  * \return \c TRUE if the node entry should be removed from \c pcmk__peer_cache,
 416  *         or \c FALSE otherwise
 417  */
 418 static gboolean
 419 should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 420 {
 421     pcmk__node_status_t *node = value;
 422     pcmk__node_status_t *search = user_data;
 423 
 424     if (search == NULL) {
 425         return FALSE;
 426     }
 427     if ((search->cluster_layer_id != 0)
 428         && (node->cluster_layer_id != search->cluster_layer_id)) {
 429         return FALSE;
 430     }
 431     if ((search->cluster_layer_id == 0)
 432         && !pcmk__str_eq(node->name, search->name, pcmk__str_casei)) {
 433         // @TODO Consider name even if ID is set?
 434         return FALSE;
 435     }
 436     if (pcmk__cluster_is_node_active(value)) {
 437         return FALSE;
 438     }
 439 
 440     crm_info("Removing node with name %s and cluster layer ID %" PRIu32
 441              " from membership cache",
 442              pcmk__s(node->name, "(unknown)"), node->cluster_layer_id);
 443     return TRUE;
 444 }
 445 
 446 /*!
 447  * \internal
 448  * \brief Remove one or more inactive nodes from the cluster node cache
 449  *
 450  * All inactive nodes matching \p id and \p node_name as described in
 451  * \c should_forget_cluster_node documentation are removed from the cache.
 452  *
 453  * If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed
 454  * from the cache regardless of ID and name. This differs from clearing the
 455  * cache, in that entries for active nodes are preserved.
 456  *
 457  * \param[in] id         ID of node to remove from cache (0 to ignore)
 458  * \param[in] node_name  Name of node to remove from cache (ignored if \p id is
 459  *                       nonzero)
 460  *
 461  * \note \p node_name is not modified directly, but it will be freed if it's a
 462  *       pointer into a cache entry that is removed.
 463  */
 464 void
 465 pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 466 {
 467     pcmk__node_status_t search = { 0, };
 468     char *criterion = NULL; // For logging
 469     guint matches = 0;
 470 
 471     if (pcmk__peer_cache == NULL) {
 472         crm_trace("Membership cache not initialized, ignoring removal request");
 473         return;
 474     }
 475 
 476     search.cluster_layer_id = id;
 477     search.name = pcmk__str_copy(node_name);    // May log after original freed
 478 
 479     if (id > 0) {
 480         criterion = crm_strdup_printf("cluster layer ID %" PRIu32, id);
 481 
 482     } else if (node_name != NULL) {
 483         criterion = crm_strdup_printf("name %s", node_name);
 484     }
 485 
 486     matches = g_hash_table_foreach_remove(pcmk__peer_cache,
 487                                           should_forget_cluster_node, &search);
 488     if (matches > 0) {
 489         if (criterion != NULL) {
 490             crm_notice("Removed %u inactive node%s with %s from the membership "
 491                        "cache",
 492                        matches, pcmk__plural_s(matches), criterion);
 493         } else {
 494             crm_notice("Removed all (%u) inactive cluster nodes from the "
 495                        "membership cache",
 496                        matches);
 497         }
 498 
 499     } else {
 500         crm_info("No inactive cluster nodes%s%s to remove from the membership "
 501                  "cache",
 502                  ((criterion != NULL)? " with " : ""), pcmk__s(criterion, ""));
 503     }
 504 
 505     free(search.name);
 506     free(criterion);
 507 }
 508 
 509 static void
 510 count_peer(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 511 {
 512     unsigned int *count = user_data;
 513     pcmk__node_status_t *node = value;
 514 
 515     if (pcmk__cluster_is_node_active(node)) {
 516         *count = *count + 1;
 517     }
 518 }
 519 
 520 /*!
 521  * \internal
 522  * \brief Get the number of active cluster nodes that have been seen
 523  *
 524  * Remote nodes are never considered active. This guarantees that they can never
 525  * become DC.
 526  *
 527  * \return Number of active nodes in the cluster node cache
 528  */
 529 unsigned int
 530 pcmk__cluster_num_active_nodes(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 531 {
 532     unsigned int count = 0;
 533 
 534     if (pcmk__peer_cache != NULL) {
 535         g_hash_table_foreach(pcmk__peer_cache, count_peer, &count);
 536     }
 537     return count;
 538 }
 539 
 540 static void
 541 destroy_crm_node(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 542 {
 543     pcmk__node_status_t *node = data;
 544 
 545     crm_trace("Destroying entry for node %" PRIu32 ": %s",
 546               node->cluster_layer_id, node->name);
 547 
 548     free(node->name);
 549     free(node->state);
 550     free(node->xml_id);
 551     free(node->user_data);
 552     free(node->expected);
 553     free(node->conn_host);
 554     free(node);
 555 }
 556 
 557 /*!
 558  * \internal
 559  * \brief Initialize node caches
 560  */
 561 void
 562 pcmk__cluster_init_node_caches(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 563 {
 564     if (pcmk__peer_cache == NULL) {
 565         pcmk__peer_cache = pcmk__strikey_table(free, destroy_crm_node);
 566     }
 567 
 568     if (pcmk__remote_peer_cache == NULL) {
 569         pcmk__remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
 570     }
 571 
 572     if (cluster_node_cib_cache == NULL) {
 573         cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
 574     }
 575 }
 576 
 577 /*!
 578  * \internal
 579  * \brief Initialize node caches
 580  */
 581 void
 582 pcmk__cluster_destroy_node_caches(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 583 {
 584     if (pcmk__peer_cache != NULL) {
 585         crm_trace("Destroying peer cache with %d members",
 586                   g_hash_table_size(pcmk__peer_cache));
 587         g_hash_table_destroy(pcmk__peer_cache);
 588         pcmk__peer_cache = NULL;
 589     }
 590 
 591     if (pcmk__remote_peer_cache != NULL) {
 592         crm_trace("Destroying remote peer cache with %d members",
 593                   pcmk__cluster_num_remote_nodes());
 594         g_hash_table_destroy(pcmk__remote_peer_cache);
 595         pcmk__remote_peer_cache = NULL;
 596     }
 597 
 598     if (cluster_node_cib_cache != NULL) {
 599         crm_trace("Destroying configured cluster node cache with %d members",
 600                   g_hash_table_size(cluster_node_cib_cache));
 601         g_hash_table_destroy(cluster_node_cib_cache);
 602         cluster_node_cib_cache = NULL;
 603     }
 604 }
 605 
 606 static void (*peer_status_callback)(enum pcmk__node_update,
 607                                     pcmk__node_status_t *,
 608                                     const void *) = NULL;
 609 
 610 /*!
 611  * \internal
 612  * \brief Set a client function that will be called after peer status changes
 613  *
 614  * \param[in] dispatch  Pointer to function to use as callback
 615  *
 616  * \note Client callbacks should do only client-specific handling. Callbacks
 617  *       must not add or remove entries in the peer caches.
 618  */
 619 void
 620 pcmk__cluster_set_status_callback(void (*dispatch)(enum pcmk__node_update,
     /* [previous][next][first][last][top][bottom][index][help] */
 621                                                    pcmk__node_status_t *,
 622                                                    const void *))
 623 {
 624     // @TODO Improve documentation of peer_status_callback
 625     peer_status_callback = dispatch;
 626 }
 627 
 628 /*!
 629  * \internal
 630  * \brief Tell the library whether to automatically reap lost nodes
 631  *
 632  * If \c true (the default), calling \c crm_update_peer_proc() will also update
 633  * the peer state to \c PCMK_VALUE_MEMBER or \c PCMK__VALUE_LOST, and updating
 634  * the peer state will reap peers whose state changes to anything other than
 635  * \c PCMK_VALUE_MEMBER.
 636  *
 637  * Callers should leave this enabled unless they plan to manage the cache
 638  * separately on their own.
 639  *
 640  * \param[in] enable  \c true to enable automatic reaping, \c false to disable
 641  */
 642 void
 643 pcmk__cluster_set_autoreap(bool enable)
     /* [previous][next][first][last][top][bottom][index][help] */
 644 {
 645     autoreap = enable;
 646 }
 647 
 648 static void
 649 dump_peer_hash(int level, const char *caller)
     /* [previous][next][first][last][top][bottom][index][help] */
 650 {
 651     GHashTableIter iter;
 652     const char *id = NULL;
 653     pcmk__node_status_t *node = NULL;
 654 
 655     g_hash_table_iter_init(&iter, pcmk__peer_cache);
 656     while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
 657         do_crm_log(level, "%s: Node %" PRIu32 "/%s = %p - %s",
 658                    caller, node->cluster_layer_id, node->name, node, id);
 659     }
 660 }
 661 
 662 static gboolean
 663 hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 664 {
 665     return value == user_data;
 666 }
 667 
 668 /*!
 669  * \internal
 670  * \brief Search cluster member node cache
 671  *
 672  * \param[in] id     If not 0, cluster node ID to search for
 673  * \param[in] uname  If not NULL, node name to search for
 674  * \param[in] uuid   If not NULL while id is 0, node UUID instead of cluster
 675  *                   node ID to search for
 676  *
 677  * \return Cluster node cache entry if found, otherwise NULL
 678  */
 679 static pcmk__node_status_t *
 680 search_cluster_member_cache(unsigned int id, const char *uname,
     /* [previous][next][first][last][top][bottom][index][help] */
 681                             const char *uuid)
 682 {
 683     GHashTableIter iter;
 684     pcmk__node_status_t *node = NULL;
 685     pcmk__node_status_t *by_id = NULL;
 686     pcmk__node_status_t *by_name = NULL;
 687 
 688     pcmk__assert((id > 0) || (uname != NULL));
 689 
 690     pcmk__cluster_init_node_caches();
 691 
 692     if (uname != NULL) {
 693         g_hash_table_iter_init(&iter, pcmk__peer_cache);
 694         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 695             if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) {
 696                 crm_trace("Name match: %s", node->name);
 697                 by_name = node;
 698                 break;
 699             }
 700         }
 701     }
 702 
 703     if (id > 0) {
 704         g_hash_table_iter_init(&iter, pcmk__peer_cache);
 705         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 706             if (node->cluster_layer_id == id) {
 707                 crm_trace("ID match: %" PRIu32, node->cluster_layer_id);
 708                 by_id = node;
 709                 break;
 710             }
 711         }
 712 
 713     } else if (uuid != NULL) {
 714         g_hash_table_iter_init(&iter, pcmk__peer_cache);
 715         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 716             if (pcmk__str_eq(node->xml_id, uuid, pcmk__str_casei)) {
 717                 crm_trace("UUID match: %s", node->xml_id);
 718                 by_id = node;
 719                 break;
 720             }
 721         }
 722     }
 723 
 724     node = by_id; /* Good default */
 725     if(by_id == by_name) {
 726         /* Nothing to do if they match (both NULL counts) */
 727         crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
 728 
 729     } else if(by_id == NULL && by_name) {
 730         crm_trace("Only one: %p for %u/%s", by_name, id, uname);
 731 
 732         if (id && by_name->cluster_layer_id) {
 733             dump_peer_hash(LOG_WARNING, __func__);
 734             crm_crit("Nodes %u and %" PRIu32 " share the same name '%s'",
 735                      id, by_name->cluster_layer_id, uname);
 736             node = NULL; /* Create a new one */
 737 
 738         } else {
 739             node = by_name;
 740         }
 741 
 742     } else if(by_name == NULL && by_id) {
 743         crm_trace("Only one: %p for %u/%s", by_id, id, uname);
 744 
 745         if ((uname != NULL) && (by_id->name != NULL)) {
 746             dump_peer_hash(LOG_WARNING, __func__);
 747             crm_crit("Nodes '%s' and '%s' share the same cluster nodeid %u: "
 748                      "assuming '%s' is correct",
 749                      uname, by_id->name, id, uname);
 750         }
 751 
 752     } else if ((uname != NULL) && (by_id->name != NULL)) {
 753         if (pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) {
 754             crm_notice("Node '%s' has changed its cluster layer ID "
 755                        "from %" PRIu32 " to %" PRIu32,
 756                        by_id->name, by_name->cluster_layer_id,
 757                        by_id->cluster_layer_id);
 758             g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data,
 759                                         by_name);
 760 
 761         } else {
 762             crm_warn("Nodes '%s' and '%s' share the same cluster nodeid: %u %s",
 763                      by_id->name, by_name->name, id, uname);
 764             dump_peer_hash(LOG_INFO, __func__);
 765             crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
 766                       TRUE);
 767         }
 768 
 769     } else if ((id > 0) && (by_name->cluster_layer_id > 0)) {
 770         crm_warn("Nodes %" PRIu32 " and %" PRIu32 " share the same name: '%s'",
 771                  by_id->cluster_layer_id, by_name->cluster_layer_id, uname);
 772 
 773     } else {
 774         /* Simple merge */
 775 
 776         /* Only corosync-based clusters use node IDs. The functions that call
 777          * pcmk__update_peer_state() and crm_update_peer_proc() only know
 778          * nodeid, so 'by_id' is authoritative when merging.
 779          */
 780         dump_peer_hash(LOG_DEBUG, __func__);
 781 
 782         crm_info("Merging %p into %p", by_name, by_id);
 783         g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data,
 784                                     by_name);
 785     }
 786 
 787     return node;
 788 }
 789 
 790 /*!
 791  * \internal
 792  * \brief Search caches for a node (cluster or Pacemaker Remote)
 793  *
 794  * \param[in] id     If not 0, cluster node ID to search for
 795  * \param[in] uname  If not NULL, node name to search for
 796  * \param[in] flags  Group of enum pcmk__node_search_flags
 797  *
 798  * \return Node cache entry if found, otherwise NULL
 799  */
 800 pcmk__node_status_t *
 801 pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 802 {
 803     pcmk__node_status_t *node = NULL;
 804 
 805     pcmk__assert((id > 0) || (uname != NULL));
 806 
 807     pcmk__cluster_init_node_caches();
 808 
 809     if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) {
 810         node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
 811     }
 812 
 813     if ((node == NULL)
 814         && pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
 815 
 816         node = search_cluster_member_cache(id, uname, NULL);
 817     }
 818 
 819     if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
 820         char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);
 821 
 822         node = find_cib_cluster_node(id_str, uname);
 823         free(id_str);
 824     }
 825 
 826     return node;
 827 }
 828 
 829 /*!
 830  * \internal
 831  * \brief Purge a node from cache (both cluster and Pacemaker Remote)
 832  *
 833  * \param[in] node_name  If not NULL, purge only nodes with this name
 834  * \param[in] node_id    If not 0, purge cluster nodes only if they have this ID
 835  *
 836  * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged.
 837  *       If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote
 838  *       nodes that match \p node_name will be purged, and cluster nodes that
 839  *       match both \p node_name and \p node_id will be purged.
 840  * \note The caller must be careful not to use \p node_name after calling this
 841  *       function if it might be a pointer into a cache entry being removed.
 842  */
 843 void
 844 pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 845 {
 846     char *node_name_copy = NULL;
 847 
 848     if ((node_name == NULL) && (node_id == 0U)) {
 849         return;
 850     }
 851 
 852     // Purge from Pacemaker Remote node cache
 853     if ((node_name != NULL)
 854         && (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL)) {
 855         /* node_name could be a pointer into the cache entry being purged,
 856          * so reassign it to a copy before the original gets freed
 857          */
 858         node_name_copy = pcmk__str_copy(node_name);
 859         node_name = node_name_copy;
 860 
 861         crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
 862         g_hash_table_remove(pcmk__remote_peer_cache, node_name);
 863     }
 864 
 865     pcmk__cluster_forget_cluster_node(node_id, node_name);
 866     free(node_name_copy);
 867 }
 868 
 869 #if SUPPORT_COROSYNC
 870 static guint
 871 remove_conflicting_peer(pcmk__node_status_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 872 {
 873     int matches = 0;
 874     GHashTableIter iter;
 875     pcmk__node_status_t *existing_node = NULL;
 876 
 877     if ((node->cluster_layer_id == 0) || (node->name == NULL)) {
 878         return 0;
 879     }
 880 
 881     if (!pcmk__corosync_has_nodelist()) {
 882         return 0;
 883     }
 884 
 885     g_hash_table_iter_init(&iter, pcmk__peer_cache);
 886     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
 887         if ((existing_node->cluster_layer_id > 0)
 888             && (existing_node->cluster_layer_id != node->cluster_layer_id)
 889             && pcmk__str_eq(existing_node->name, node->name, pcmk__str_casei)) {
 890 
 891             if (pcmk__cluster_is_node_active(existing_node)) {
 892                 continue;
 893             }
 894 
 895             crm_warn("Removing cached offline node %" PRIu32 "/%s which has "
 896                      "conflicting name with %" PRIu32,
 897                      existing_node->cluster_layer_id, existing_node->name,
 898                      node->cluster_layer_id);
 899 
 900             g_hash_table_iter_remove(&iter);
 901             matches++;
 902         }
 903     }
 904 
 905     return matches;
 906 }
 907 #endif
 908 
 909 /*!
 910  * \internal
 911  * \brief Get a cluster node cache entry, possibly creating one if not found
 912  *
 913  * If \c pcmk__node_search_cluster_member is set in \p flags, the return value
 914  * is guaranteed not to be \c NULL. A new cache entry is created if one does not
 915  * already exist.
 916  *
 917  * \param[in] id      If not 0, cluster node ID to search for
 918  * \param[in] uname   If not NULL, node name to search for
 919  * \param[in] xml_id  If not NULL while \p id is 0, search for this CIB XML ID
 920  *                    instead of a cluster ID
 921  * \param[in] flags   Group of enum pcmk__node_search_flags
 922  *
 923  * \return (Possibly newly created) cluster node cache entry
 924  */
 925 /* coverity[-alloc] Memory is referenced in one or both hashtables */
 926 pcmk__node_status_t *
 927 pcmk__get_node(unsigned int id, const char *uname, const char *xml_id,
     /* [previous][next][first][last][top][bottom][index][help] */
 928                uint32_t flags)
 929 {
 930     pcmk__node_status_t *node = NULL;
 931     char *uname_lookup = NULL;
 932 
 933     pcmk__assert((id > 0) || (uname != NULL));
 934 
 935     pcmk__cluster_init_node_caches();
 936 
 937     // Check the Pacemaker Remote node cache first
 938     if (pcmk_is_set(flags, pcmk__node_search_remote)) {
 939         node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
 940         if (node != NULL) {
 941             return node;
 942         }
 943     }
 944 
 945     if (!pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
 946         return NULL;
 947     }
 948 
 949     node = search_cluster_member_cache(id, uname, xml_id);
 950 
 951     /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
 952      * we need to do a lookup of the node name using the id in the cluster membership. */
 953     if ((uname == NULL) && ((node == NULL) || (node->name == NULL))) {
 954         uname_lookup = pcmk__cluster_node_name(id);
 955     }
 956 
 957     if (uname_lookup) {
 958         uname = uname_lookup;
 959         crm_trace("Inferred a name of '%s' for node %u", uname, id);
 960 
 961         /* try to turn up the node one more time now that we know the uname. */
 962         if (node == NULL) {
 963             node = search_cluster_member_cache(id, uname, xml_id);
 964         }
 965     }
 966 
 967     if (node == NULL) {
 968         char *uniqueid = crm_generate_uuid();
 969 
 970         node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t));
 971 
 972         crm_info("Created entry %s/%p for node %s/%u (%d total)",
 973                  uniqueid, node, uname, id,
 974                  1 + g_hash_table_size(pcmk__peer_cache));
 975         g_hash_table_replace(pcmk__peer_cache, uniqueid, node);
 976     }
 977 
 978     if ((id > 0) && (uname != NULL)
 979         && ((node->cluster_layer_id == 0) || (node->name == NULL))) {
 980         crm_info("Node %u is now known as %s", id, uname);
 981     }
 982 
 983     if ((id > 0) && (node->cluster_layer_id == 0)) {
 984         node->cluster_layer_id = id;
 985     }
 986 
 987     if ((uname != NULL) && (node->name == NULL)) {
 988         update_peer_uname(node, uname);
 989     }
 990 
 991     if ((xml_id == NULL) && (node->xml_id == NULL)) {
 992         xml_id = pcmk__cluster_node_uuid(node);
 993         if (xml_id == NULL) {
 994             crm_debug("Cannot obtain an XML ID for node %s[%u] at this time",
 995                       node->name, id);
 996         } else {
 997             crm_info("Node %s[%u] has XML ID %s", node->name, id, xml_id);
 998         }
 999     }
1000 
1001     free(uname_lookup);
1002 
1003     return node;
1004 }
1005 
1006 /*!
1007  * \internal
1008  * \brief Update a node's uname
1009  *
1010  * \param[in,out] node   Node object to update
1011  * \param[in]     uname  New name to set
1012  *
1013  * \note This function should not be called within a peer cache iteration,
1014  *       because in some cases it can remove conflicting cache entries,
1015  *       which would invalidate the iterator.
1016  */
1017 static void
1018 update_peer_uname(pcmk__node_status_t *node, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
1019 {
1020     CRM_CHECK(uname != NULL,
1021               crm_err("Bug: can't update node name without name"); return);
1022     CRM_CHECK(node != NULL,
1023               crm_err("Bug: can't update node name to %s without node", uname);
1024               return);
1025 
1026     if (pcmk__str_eq(uname, node->name, pcmk__str_casei)) {
1027         crm_debug("Node name '%s' did not change", uname);
1028         return;
1029     }
1030 
1031     for (const char *c = uname; *c; ++c) {
1032         if ((*c >= 'A') && (*c <= 'Z')) {
1033             crm_warn("Node names with capitals are discouraged, consider changing '%s'",
1034                      uname);
1035             break;
1036         }
1037     }
1038 
1039     pcmk__str_update(&node->name, uname);
1040 
1041     if (peer_status_callback != NULL) {
1042         peer_status_callback(pcmk__node_update_name, node, NULL);
1043     }
1044 
1045 #if SUPPORT_COROSYNC
1046     if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)
1047         && !pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1048 
1049         remove_conflicting_peer(node);
1050     }
1051 #endif
1052 }
1053 
1054 /*!
1055  * \internal
1056  * \brief Get log-friendly string equivalent of a process flag
1057  *
1058  * \param[in] proc  Process flag
1059  *
1060  * \return Log-friendly string equivalent of \p proc
1061  */
1062 static inline const char *
1063 proc2text(enum crm_proc_flag proc)
     /* [previous][next][first][last][top][bottom][index][help] */
1064 {
1065     const char *text = "unknown";
1066 
1067     switch (proc) {
1068         case crm_proc_none:
1069             text = "none";
1070             break;
1071         case crm_proc_cpg:
1072             text = "corosync-cpg";
1073             break;
1074     }
1075     return text;
1076 }
1077 
1078 /*!
1079  * \internal
1080  * \brief Update a node's process information (and potentially state)
1081  *
1082  * \param[in]     source  Caller's function name (for log messages)
1083  * \param[in,out] node    Node object to update
1084  * \param[in]     flag    Bitmask of new process information
1085  * \param[in]     status  node status (online, offline, etc.)
1086  *
1087  * \return NULL if any node was reaped from peer caches, value of node otherwise
1088  *
1089  * \note If this function returns NULL, the supplied node object was likely
1090  *       freed and should not be used again. This function should not be
1091  *       called within a cache iteration if reaping is possible, otherwise
1092  *       reaping could invalidate the iterator.
1093  */
1094 pcmk__node_status_t *
1095 crm_update_peer_proc(const char *source, pcmk__node_status_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1096                      uint32_t flag, const char *status)
1097 {
1098     uint32_t last = 0;
1099     gboolean changed = FALSE;
1100 
1101     CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
1102                                     source, proc2text(flag), status);
1103                             return NULL);
1104 
1105     /* Pacemaker doesn't spawn processes on remote nodes */
1106     if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1107         return node;
1108     }
1109 
1110     last = node->processes;
1111     if (status == NULL) {
1112         node->processes = flag;
1113         if (node->processes != last) {
1114             changed = TRUE;
1115         }
1116 
1117     } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1118         if ((node->processes & flag) != flag) {
1119             node->processes = pcmk__set_flags_as(__func__, __LINE__,
1120                                                  LOG_TRACE, "Peer process",
1121                                                  node->name, node->processes,
1122                                                  flag, "processes");
1123             changed = TRUE;
1124         }
1125 
1126     } else if (node->processes & flag) {
1127         node->processes = pcmk__clear_flags_as(__func__, __LINE__,
1128                                                LOG_TRACE, "Peer process",
1129                                                node->name, node->processes,
1130                                                flag, "processes");
1131         changed = TRUE;
1132     }
1133 
1134     if (changed) {
1135         if (status == NULL && flag <= crm_proc_none) {
1136             crm_info("%s: Node %s[%" PRIu32 "] - all processes are now offline",
1137                      source, node->name, node->cluster_layer_id);
1138         } else {
1139             crm_info("%s: Node %s[%" PRIu32 "] - %s is now %s",
1140                      source, node->name, node->cluster_layer_id,
1141                      proc2text(flag), status);
1142         }
1143 
1144         if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1145             node->when_online = time(NULL);
1146 
1147         } else {
1148             node->when_online = 0;
1149         }
1150 
1151         /* Call the client callback first, then update the peer state,
1152          * in case the node will be reaped
1153          */
1154         if (peer_status_callback != NULL) {
1155             peer_status_callback(pcmk__node_update_processes, node, &last);
1156         }
1157 
1158         /* The client callback shouldn't touch the peer caches,
1159          * but as a safety net, bail if the peer cache was destroyed.
1160          */
1161         if (pcmk__peer_cache == NULL) {
1162             return NULL;
1163         }
1164 
1165         if (autoreap) {
1166             const char *peer_state = NULL;
1167 
1168             if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1169                 peer_state = PCMK_VALUE_MEMBER;
1170             } else {
1171                 peer_state = PCMK__VALUE_LOST;
1172             }
1173             node = pcmk__update_peer_state(__func__, node, peer_state, 0);
1174         }
1175     } else {
1176         crm_trace("%s: Node %s[%" PRIu32 "] - %s is unchanged (%s)",
1177                   source, node->name, node->cluster_layer_id, proc2text(flag),
1178                   status);
1179     }
1180     return node;
1181 }
1182 
1183 /*!
1184  * \internal
1185  * \brief Update a cluster node cache entry's expected join state
1186  *
1187  * \param[in]     source    Caller's function name (for logging)
1188  * \param[in,out] node      Node to update
1189  * \param[in]     expected  Node's new join state
1190  */
1191 void
1192 pcmk__update_peer_expected(const char *source, pcmk__node_status_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1193                            const char *expected)
1194 {
1195     char *last = NULL;
1196     gboolean changed = FALSE;
1197 
1198     CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
1199               return);
1200 
1201     /* Remote nodes don't participate in joins */
1202     if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1203         return;
1204     }
1205 
1206     last = node->expected;
1207     if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
1208         node->expected = strdup(expected);
1209         changed = TRUE;
1210     }
1211 
1212     if (changed) {
1213         crm_info("%s: Node %s[%" PRIu32 "] - expected state is now %s (was %s)",
1214                  source, node->name, node->cluster_layer_id, expected, last);
1215         free(last);
1216     } else {
1217         crm_trace("%s: Node %s[%" PRIu32 "] - expected state is unchanged (%s)",
1218                   source, node->name, node->cluster_layer_id, expected);
1219     }
1220 }
1221 
1222 /*!
1223  * \internal
1224  * \brief Update a node's state and membership information
1225  *
1226  * \param[in]     source      Caller's function name (for log messages)
1227  * \param[in,out] node        Node object to update
1228  * \param[in]     state       Node's new state
1229  * \param[in]     membership  Node's new membership ID
1230  * \param[in,out] iter        If not NULL, pointer to node's peer cache iterator
1231  *
1232  * \return NULL if any node was reaped, value of node otherwise
1233  *
1234  * \note If this function returns NULL, the supplied node object was likely
1235  *       freed and should not be used again. This function may be called from
1236  *       within a peer cache iteration if the iterator is supplied.
1237  */
1238 static pcmk__node_status_t *
1239 update_peer_state_iter(const char *source, pcmk__node_status_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1240                        const char *state, uint64_t membership,
1241                        GHashTableIter *iter)
1242 {
1243     gboolean is_member;
1244 
1245     CRM_CHECK(node != NULL,
1246               crm_err("Could not set state for unknown host to %s "
1247                       QB_XS " source=%s", state, source);
1248               return NULL);
1249 
1250     is_member = pcmk__str_eq(state, PCMK_VALUE_MEMBER, pcmk__str_none);
1251     if (is_member) {
1252         node->when_lost = 0;
1253         if (membership) {
1254             node->membership_id = membership;
1255         }
1256     }
1257 
1258     if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
1259         char *last = node->state;
1260 
1261         if (is_member) {
1262              node->when_member = time(NULL);
1263 
1264         } else {
1265              node->when_member = 0;
1266         }
1267 
1268         node->state = strdup(state);
1269         crm_notice("Node %s state is now %s " QB_XS
1270                    " nodeid=%" PRIu32 " previous=%s source=%s",
1271                    node->name, state, node->cluster_layer_id,
1272                    pcmk__s(last, "unknown"), source);
1273         if (peer_status_callback != NULL) {
1274             peer_status_callback(pcmk__node_update_state, node, last);
1275         }
1276         free(last);
1277 
1278         if (autoreap && !is_member
1279             && !pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1280             /* We only autoreap from the peer cache, not the remote peer cache,
1281              * because the latter should be managed only by
1282              * refresh_remote_nodes().
1283              */
1284             if(iter) {
1285                 crm_notice("Purged 1 peer with cluster layer ID %" PRIu32
1286                            "and/or name=%s from the membership cache",
1287                            node->cluster_layer_id, node->name);
1288                 g_hash_table_iter_remove(iter);
1289 
1290             } else {
1291                 pcmk__cluster_forget_cluster_node(node->cluster_layer_id,
1292                                                   node->name);
1293             }
1294             node = NULL;
1295         }
1296 
1297     } else {
1298         crm_trace("Node %s state is unchanged (%s) " QB_XS
1299                   " nodeid=%" PRIu32 " source=%s",
1300                   node->name, state, node->cluster_layer_id, source);
1301     }
1302     return node;
1303 }
1304 
1305 /*!
1306  * \brief Update a node's state and membership information
1307  *
1308  * \param[in]     source      Caller's function name (for log messages)
1309  * \param[in,out] node        Node object to update
1310  * \param[in]     state       Node's new state
1311  * \param[in]     membership  Node's new membership ID
1312  *
1313  * \return NULL if any node was reaped, value of node otherwise
1314  *
1315  * \note If this function returns NULL, the supplied node object was likely
1316  *       freed and should not be used again. This function should not be
1317  *       called within a cache iteration if reaping is possible,
1318  *       otherwise reaping could invalidate the iterator.
1319  */
1320 pcmk__node_status_t *
1321 pcmk__update_peer_state(const char *source, pcmk__node_status_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1322                         const char *state, uint64_t membership)
1323 {
1324     return update_peer_state_iter(source, node, state, membership, NULL);
1325 }
1326 
1327 /*!
1328  * \internal
1329  * \brief Reap all nodes from cache whose membership information does not match
1330  *
1331  * \param[in] membership  Membership ID of nodes to keep
1332  */
1333 void
1334 pcmk__reap_unseen_nodes(uint64_t membership)
     /* [previous][next][first][last][top][bottom][index][help] */
1335 {
1336     GHashTableIter iter;
1337     pcmk__node_status_t *node = NULL;
1338 
1339     crm_trace("Reaping unseen nodes...");
1340     g_hash_table_iter_init(&iter, pcmk__peer_cache);
1341     while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1342         if (node->membership_id != membership) {
1343             if (node->state) {
1344                 /* Calling update_peer_state_iter() allows us to remove the node
1345                  * from pcmk__peer_cache without invalidating our iterator
1346                  */
1347                 update_peer_state_iter(__func__, node, PCMK__VALUE_LOST,
1348                                        membership, &iter);
1349 
1350             } else {
1351                 crm_info("State of node %s[%" PRIu32 "] is still unknown",
1352                          node->name, node->cluster_layer_id);
1353             }
1354         }
1355     }
1356 }
1357 
1358 static pcmk__node_status_t *
1359 find_cib_cluster_node(const char *id, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
1360 {
1361     GHashTableIter iter;
1362     pcmk__node_status_t *node = NULL;
1363     pcmk__node_status_t *by_id = NULL;
1364     pcmk__node_status_t *by_name = NULL;
1365 
1366     if (uname) {
1367         g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1368         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1369             if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) {
1370                 crm_trace("Name match: %s = %p", node->name, node);
1371                 by_name = node;
1372                 break;
1373             }
1374         }
1375     }
1376 
1377     if (id) {
1378         g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1379         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1380             if (pcmk__str_eq(node->xml_id, id, pcmk__str_casei)) {
1381                 crm_trace("ID match: %s= %p", id, node);
1382                 by_id = node;
1383                 break;
1384             }
1385         }
1386     }
1387 
1388     node = by_id; /* Good default */
1389     if (by_id == by_name) {
1390         /* Nothing to do if they match (both NULL counts) */
1391         crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1392 
1393     } else if (by_id == NULL && by_name) {
1394         crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1395 
1396         if (id) {
1397             node = NULL;
1398 
1399         } else {
1400             node = by_name;
1401         }
1402 
1403     } else if (by_name == NULL && by_id) {
1404         crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1405 
1406         if (uname) {
1407             node = NULL;
1408         }
1409 
1410     } else if ((uname != NULL) && (by_id->name != NULL)
1411                && pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) {
1412         /* Multiple nodes have the same uname in the CIB.
1413          * Return by_id. */
1414 
1415     } else if ((id != NULL) && (by_name->xml_id != NULL)
1416                && pcmk__str_eq(id, by_name->xml_id, pcmk__str_casei)) {
1417         /* Multiple nodes have the same id in the CIB.
1418          * Return by_name. */
1419         node = by_name;
1420 
1421     } else {
1422         node = NULL;
1423     }
1424 
1425     if (node == NULL) {
1426         crm_debug("Couldn't find node%s%s%s%s",
1427                    id? " " : "",
1428                    id? id : "",
1429                    uname? " with name " : "",
1430                    uname? uname : "");
1431     }
1432 
1433     return node;
1434 }
1435 
1436 static void
1437 cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1438 {
1439     const char *id = crm_element_value(xml_node, PCMK_XA_ID);
1440     const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME);
1441     pcmk__node_status_t * node =  NULL;
1442 
1443     CRM_CHECK(id != NULL && uname !=NULL, return);
1444     node = find_cib_cluster_node(id, uname);
1445 
1446     if (node == NULL) {
1447         char *uniqueid = crm_generate_uuid();
1448 
1449         node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t));
1450 
1451         node->name = pcmk__str_copy(uname);
1452         node->xml_id = pcmk__str_copy(id);
1453 
1454         g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);
1455 
1456     } else if (pcmk_is_set(node->flags, pcmk__node_status_dirty)) {
1457         pcmk__str_update(&node->name, uname);
1458 
1459         /* Node is in cache and hasn't been updated already, so mark it clean */
1460         clear_peer_flags(node, pcmk__node_status_dirty);
1461     }
1462 
1463 }
1464 
1465 static void
1466 refresh_cluster_node_cib_cache(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1467 {
1468     pcmk__cluster_init_node_caches();
1469 
1470     g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);
1471 
1472     crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG,
1473                              cluster_node_cib_cache_refresh_helper, NULL);
1474 
1475     // Remove all old cache entries that weren't seen in the CIB
1476     g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
1477 }
1478 
1479 void
1480 pcmk__refresh_node_caches_from_cib(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1481 {
1482     refresh_remote_nodes(cib);
1483     refresh_cluster_node_cib_cache(cib);
1484 }
1485 
1486 // Deprecated functions kept only for backward API compatibility
1487 // LCOV_EXCL_START
1488 
1489 #include <crm/cluster/compat.h>
1490 
1491 void
1492 crm_peer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1493 {
1494     pcmk__cluster_init_node_caches();
1495 }
1496 
1497 // LCOV_EXCL_STOP
1498 // End deprecated API

/* [previous][next][first][last][top][bottom][index][help] */