root/lib/cluster/membership.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. pcmk__cluster_num_remote_nodes
  2. pcmk__cluster_lookup_remote_node
  3. pcmk__cluster_forget_remote_node
  4. remote_state_from_cib
  5. remote_cache_refresh_helper
  6. mark_dirty
  7. is_dirty
  8. refresh_remote_nodes
  9. pcmk__cluster_is_node_active
  10. should_forget_cluster_node
  11. pcmk__cluster_forget_cluster_node
  12. count_peer
  13. pcmk__cluster_num_active_nodes
  14. destroy_crm_node
  15. pcmk__cluster_init_node_caches
  16. pcmk__cluster_destroy_node_caches
  17. pcmk__cluster_set_status_callback
  18. pcmk__cluster_set_autoreap
  19. dump_peer_hash
  20. hash_find_by_data
  21. search_cluster_member_cache
  22. pcmk__search_node_caches
  23. pcmk__purge_node_from_cache
  24. remove_conflicting_peer
  25. pcmk__get_node
  26. update_peer_uname
  27. proc2text
  28. crm_update_peer_proc
  29. pcmk__update_peer_expected
  30. update_peer_state_iter
  31. pcmk__update_peer_state
  32. pcmk__reap_unseen_nodes
  33. find_cib_cluster_node
  34. cluster_node_cib_cache_refresh_helper
  35. refresh_cluster_node_cib_cache
  36. pcmk__refresh_node_caches_from_cib
  37. crm_terminate_member
  38. crm_terminate_member_no_mainloop
  39. crm_get_peer
  40. crm_get_peer_full
  41. crm_remote_peer_cache_size
  42. crm_remote_peer_cache_refresh
  43. crm_remote_peer_get
  44. crm_remote_peer_cache_remove
  45. crm_is_peer_active
  46. crm_active_peers
  47. reap_crm_member
  48. crm_peer_init
  49. crm_peer_destroy
  50. crm_set_autoreap
  51. crm_set_status_callback

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #ifndef _GNU_SOURCE
  13 #  define _GNU_SOURCE
  14 #endif
  15 
  16 #include <inttypes.h>                   // PRIu32
  17 #include <sys/param.h>
  18 #include <sys/types.h>
  19 #include <stdio.h>
  20 #include <unistd.h>
  21 #include <string.h>
  22 #include <glib.h>
  23 #include <crm/common/ipc.h>
  24 #include <crm/common/xml_internal.h>
  25 #include <crm/cluster/internal.h>
  26 #include <crm/common/xml.h>
  27 #include <crm/stonith-ng.h>
  28 #include "crmcluster_private.h"
  29 
  30 /* The peer cache remembers cluster nodes that have been seen.
  31  * This is managed mostly automatically by libcluster, based on
  32  * cluster membership events.
  33  *
  34  * Because cluster nodes can have conflicting names or UUIDs,
  35  * the hash table key is a uniquely generated ID.
  36  *
  37  * @COMPAT When this is internal, rename to cluster_node_member_cache and make
  38  * static.
  39  */
  40 GHashTable *crm_peer_cache = NULL;
  41 
  42 /*
  43  * The remote peer cache tracks pacemaker_remote nodes. While the
  44  * value has the same type as the peer cache's, it is tracked separately for
  45  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
  46  * so the name (which is also the UUID) is used as the hash table key; there
  47  * is no equivalent of membership events, so management is not automatic; and
  48  * most users of the peer cache need to exclude pacemaker_remote nodes.
  49  *
  50  * That said, using a single cache would be more logical and less error-prone,
  51  * so it would be a good idea to merge them one day.
  52  *
  53  * libcluster provides two avenues for populating the cache:
  54  * pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node()
  55  * directly manage it, while refresh_remote_nodes() populates it via the CIB.
  56  */
  57 GHashTable *crm_remote_peer_cache = NULL;
  58 
  59 /*
  60  * The CIB cluster node cache tracks cluster nodes that have been seen in
  61  * the CIB. It is useful mainly when a caller needs to know about a node that
  62  * may no longer be in the membership, but doesn't want to add the node to the
  63  * main peer cache tables.
  64  */
  65 static GHashTable *cluster_node_cib_cache = NULL;
  66 
  67 unsigned long long crm_peer_seq = 0;
  68 gboolean crm_have_quorum = FALSE;
  69 static bool autoreap = true;
  70 
  71 // Flag setting and clearing for crm_node_t:flags
  72 
  73 #define set_peer_flags(peer, flags_to_set) do {                               \
  74         (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE,     \
  75                                            "Peer", (peer)->uname,             \
  76                                            (peer)->flags, (flags_to_set),     \
  77                                            #flags_to_set);                    \
  78     } while (0)
  79 
  80 #define clear_peer_flags(peer, flags_to_clear) do {                           \
  81         (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__,              \
  82                                              LOG_TRACE,                       \
  83                                              "Peer", (peer)->uname,           \
  84                                              (peer)->flags, (flags_to_clear), \
  85                                              #flags_to_clear);                \
  86     } while (0)
  87 
  88 static void update_peer_uname(crm_node_t *node, const char *uname);
  89 static crm_node_t *find_cib_cluster_node(const char *id, const char *uname);
  90 
  91 /*!
  92  * \internal
  93  * \brief Get the number of Pacemaker Remote nodes that have been seen
  94  *
  95  * \return Number of cached Pacemaker Remote nodes
  96  */
  97 unsigned int
  98 pcmk__cluster_num_remote_nodes(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  99 {
 100     if (crm_remote_peer_cache == NULL) {
 101         return 0U;
 102     }
 103     return g_hash_table_size(crm_remote_peer_cache);
 104 }
 105 
 106 /*!
 107  * \internal
 108  * \brief Get a remote node cache entry, creating it if necessary
 109  *
 110  * \param[in] node_name  Name of remote node
 111  *
 112  * \return Cache entry for node on success, or \c NULL (and set \c errno)
 113  *         otherwise
 114  *
 115  * \note When creating a new entry, this will leave the node state undetermined.
 116  *       The caller should also call \c pcmk__update_peer_state() if the state
 117  *       is known.
 118  * \note Because this can add and remove cache entries, callers should not
 119  *       assume any previously obtained cache entry pointers remain valid.
 120  */
 121 crm_node_t *
 122 pcmk__cluster_lookup_remote_node(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 123 {
 124     crm_node_t *node;
 125     char *node_name_copy = NULL;
 126 
 127     if (node_name == NULL) {
 128         errno = EINVAL;
 129         return NULL;
 130     }
 131 
 132     /* It's theoretically possible that the node was added to the cluster peer
 133      * cache before it was known to be a Pacemaker Remote node. Remove that
 134      * entry unless it has a node ID, which means the name actually is
 135      * associated with a cluster node. (@TODO return an error in that case?)
 136      */
 137     node = pcmk__search_node_caches(0, node_name,
 138                                     pcmk__node_search_cluster_member);
 139     if ((node != NULL) && (node->uuid == NULL)) {
 140         /* node_name could be a pointer into the cache entry being removed, so
 141          * reassign it to a copy before the original gets freed
 142          */
 143         node_name_copy = strdup(node_name);
 144         if (node_name_copy == NULL) {
 145             errno = ENOMEM;
 146             return NULL;
 147         }
 148         node_name = node_name_copy;
 149         pcmk__cluster_forget_cluster_node(0, node_name);
 150     }
 151 
 152     /* Return existing cache entry if one exists */
 153     node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
 154     if (node) {
 155         free(node_name_copy);
 156         return node;
 157     }
 158 
 159     /* Allocate a new entry */
 160     node = calloc(1, sizeof(crm_node_t));
 161     if (node == NULL) {
 162         free(node_name_copy);
 163         return NULL;
 164     }
 165 
 166     /* Populate the essential information */
 167     set_peer_flags(node, crm_remote_node);
 168     node->uuid = strdup(node_name);
 169     if (node->uuid == NULL) {
 170         free(node);
 171         errno = ENOMEM;
 172         free(node_name_copy);
 173         return NULL;
 174     }
 175 
 176     /* Add the new entry to the cache */
 177     g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
 178     crm_trace("added %s to remote cache", node_name);
 179 
 180     /* Update the entry's uname, ensuring peer status callbacks are called */
 181     update_peer_uname(node, node_name);
 182     free(node_name_copy);
 183     return node;
 184 }
 185 
 186 /*!
 187  * \internal
 188  * \brief Remove a node from the Pacemaker Remote node cache
 189  *
 190  * \param[in] node_name  Name of node to remove from cache
 191  *
 192  * \note The caller must be careful not to use \p node_name after calling this
 193  *       function if it might be a pointer into the cache entry being removed.
 194  */
 195 void
 196 pcmk__cluster_forget_remote_node(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 197 {
 198     /* Do a lookup first, because node_name could be a pointer within the entry
 199      * being removed -- we can't log it *after* removing it.
 200      */
 201     if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) {
 202         crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
 203         g_hash_table_remove(crm_remote_peer_cache, node_name);
 204     }
 205 }
 206 
 207 /*!
 208  * \internal
 209  * \brief Return node status based on a CIB status entry
 210  *
 211  * \param[in] node_state  XML of node state
 212  *
 213  * \return \c CRM_NODE_LOST if \c PCMK__XA_IN_CCM is false in
 214  *         \c PCMK__XE_NODE_STATE, \c CRM_NODE_MEMBER otherwise
 215  * \note Unlike most boolean XML attributes, this one defaults to true, for
 216  *       backward compatibility with older controllers that don't set it.
 217  */
 218 static const char *
 219 remote_state_from_cib(const xmlNode *node_state)
     /* [previous][next][first][last][top][bottom][index][help] */
 220 {
 221     bool status = false;
 222 
 223     if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM,
 224                                 &status) == pcmk_rc_ok) && !status) {
 225         return CRM_NODE_LOST;
 226     } else {
 227         return CRM_NODE_MEMBER;
 228     }
 229 }
 230 
 231 /* user data for looping through remote node xpath searches */
 232 struct refresh_data {
 233     const char *field;  /* XML attribute to check for node name */
 234     gboolean has_state; /* whether to update node state based on XML */
 235 };
 236 
 237 /*!
 238  * \internal
 239  * \brief Process one pacemaker_remote node xpath search result
 240  *
 241  * \param[in] result     XML search result
 242  * \param[in] user_data  what to look for in the XML
 243  */
 244 static void
 245 remote_cache_refresh_helper(xmlNode *result, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 246 {
 247     const struct refresh_data *data = user_data;
 248     const char *remote = crm_element_value(result, data->field);
 249     const char *state = NULL;
 250     crm_node_t *node;
 251 
 252     CRM_CHECK(remote != NULL, return);
 253 
 254     /* Determine node's state, if the result has it */
 255     if (data->has_state) {
 256         state = remote_state_from_cib(result);
 257     }
 258 
 259     /* Check whether cache already has entry for node */
 260     node = g_hash_table_lookup(crm_remote_peer_cache, remote);
 261 
 262     if (node == NULL) {
 263         /* Node is not in cache, so add a new entry for it */
 264         node = pcmk__cluster_lookup_remote_node(remote);
 265         CRM_ASSERT(node);
 266         if (state) {
 267             pcmk__update_peer_state(__func__, node, state, 0);
 268         }
 269 
 270     } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
 271         /* Node is in cache and hasn't been updated already, so mark it clean */
 272         clear_peer_flags(node, crm_node_dirty);
 273         if (state) {
 274             pcmk__update_peer_state(__func__, node, state, 0);
 275         }
 276     }
 277 }
 278 
 279 static void
 280 mark_dirty(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 281 {
 282     set_peer_flags((crm_node_t *) value, crm_node_dirty);
 283 }
 284 
 285 static gboolean
 286 is_dirty(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 287 {
 288     return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
 289 }
 290 
 291 /*!
 292  * \internal
 293  * \brief Repopulate the remote node cache based on CIB XML
 294  *
 295  * \param[in] cib  CIB XML to parse
 296  */
 297 static void
 298 refresh_remote_nodes(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
 299 {
 300     struct refresh_data data;
 301 
 302     pcmk__cluster_init_node_caches();
 303 
 304     /* First, we mark all existing cache entries as dirty,
 305      * so that later we can remove any that weren't in the CIB.
 306      * We don't empty the cache, because we need to detect changes in state.
 307      */
 308     g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
 309 
 310     /* Look for guest nodes and remote nodes in the status section */
 311     data.field = PCMK_XA_ID;
 312     data.has_state = TRUE;
 313     crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS,
 314                              remote_cache_refresh_helper, &data);
 315 
 316     /* Look for guest nodes and remote nodes in the configuration section,
 317      * because they may have just been added and not have a status entry yet.
 318      * In that case, the cached node state will be left NULL, so that the
 319      * peer status callback isn't called until we're sure the node started
 320      * successfully.
 321      */
 322     data.field = PCMK_XA_VALUE;
 323     data.has_state = FALSE;
 324     crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG,
 325                              remote_cache_refresh_helper, &data);
 326     data.field = PCMK_XA_ID;
 327     data.has_state = FALSE;
 328     crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG,
 329                              remote_cache_refresh_helper, &data);
 330 
 331     /* Remove all old cache entries that weren't seen in the CIB */
 332     g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
 333 }
 334 
 335 /*!
 336  * \internal
 337  * \brief Check whether a node is an active cluster node
 338  *
 339  * Remote nodes are never considered active. This guarantees that they can never
 340  * become DC.
 341  *
 342  * \param[in] node  Node to check
 343  *
 344  * \return \c true if the node is an active cluster node, or \c false otherwise
 345  */
 346 bool
 347 pcmk__cluster_is_node_active(const crm_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 348 {
 349     const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
 350 
 351     if ((node == NULL) || pcmk_is_set(node->flags, crm_remote_node)) {
 352         return false;
 353     }
 354 
 355     switch (cluster_layer) {
 356         case pcmk_cluster_layer_corosync:
 357 #if SUPPORT_COROSYNC
 358             return pcmk__corosync_is_peer_active(node);
 359 #else
 360             break;
 361 #endif  // SUPPORT_COROSYNC
 362         default:
 363             break;
 364     }
 365 
 366     crm_err("Unhandled cluster layer: %s",
 367             pcmk_cluster_layer_text(cluster_layer));
 368     return false;
 369 }
 370 
 371 /*!
 372  * \internal
 373  * \brief Check if a node's entry should be removed from the cluster node cache
 374  *
 375  * A node should be removed from the cache if it's inactive and matches another
 376  * \c crm_node_t (the search object). The node is considered a mismatch if any
 377  * of the following are true:
 378  * * The search object is \c NULL.
 379  * * The search object has an ID set and the cached node's ID does not match it.
 380  * * The search object does not have an ID set, and the cached node's name does
 381  *   not match the search node's name. (If both names are \c NULL, it's a
 382  *   match.)
 383  *
 384  * Otherwise, the node is considered a match.
 385  *
 386  * Note that if the search object has both an ID and a name set, the name is
 387  * ignored for matching purposes.
 388  *
 389  * \param[in] key        Ignored
 390  * \param[in] value      \c crm_node_t object from cluster node cache
 391  * \param[in] user_data  \c crm_node_t object to match against (search object)
 392  *
 393  * \return \c TRUE if the node entry should be removed from \c crm_peer_cache,
 394  *         or \c FALSE otherwise
 395  */
 396 static gboolean
 397 should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 398 {
 399     crm_node_t *node = value;
 400     crm_node_t *search = user_data;
 401 
 402     if (search == NULL) {
 403         return FALSE;
 404     }
 405     if ((search->id != 0) && (node->id != search->id)) {
 406         return FALSE;
 407     }
 408     if ((search->id == 0)
 409         && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
 410         // @TODO Consider name even if ID is set?
 411         return FALSE;
 412     }
 413     if (pcmk__cluster_is_node_active(value)) {
 414         return FALSE;
 415     }
 416 
 417     crm_info("Removing node with name %s and " PCMK_XA_ID " %u from membership "
 418              "cache",
 419              pcmk__s(node->uname, "(unknown)"), node->id);
 420     return TRUE;
 421 }
 422 
 423 /*!
 424  * \internal
 425  * \brief Remove one or more inactive nodes from the cluster node cache
 426  *
 427  * All inactive nodes matching \p id and \p node_name as described in
 428  * \c should_forget_cluster_node documentation are removed from the cache.
 429  *
 430  * If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed
 431  * from the cache regardless of ID and name. This differs from clearing the
 432  * cache, in that entries for active nodes are preserved.
 433  *
 434  * \param[in] id         ID of node to remove from cache (0 to ignore)
 435  * \param[in] node_name  Name of node to remove from cache (ignored if \p id is
 436  *                       nonzero)
 437  *
 438  * \note \p node_name is not modified directly, but it will be freed if it's a
 439  *       pointer into a cache entry that is removed.
 440  */
 441 void
 442 pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 443 {
 444     crm_node_t search = { 0, };
 445     char *criterion = NULL; // For logging
 446     guint matches = 0;
 447 
 448     if (crm_peer_cache == NULL) {
 449         crm_trace("Membership cache not initialized, ignoring removal request");
 450         return;
 451     }
 452 
 453     search.id = id;
 454     search.uname = pcmk__str_copy(node_name);   // May log after original freed
 455 
 456     if (id > 0) {
 457         criterion = crm_strdup_printf(PCMK_XA_ID "=%" PRIu32, id);
 458 
 459     } else if (node_name != NULL) {
 460         criterion = crm_strdup_printf(PCMK_XA_UNAME "=%s", node_name);
 461     }
 462 
 463     matches = g_hash_table_foreach_remove(crm_peer_cache,
 464                                           should_forget_cluster_node, &search);
 465     if (matches > 0) {
 466         if (criterion != NULL) {
 467             crm_notice("Removed %u inactive node%s with %s from the membership "
 468                        "cache",
 469                        matches, pcmk__plural_s(matches), criterion);
 470         } else {
 471             crm_notice("Removed all (%u) inactive cluster nodes from the "
 472                        "membership cache",
 473                        matches);
 474         }
 475 
 476     } else {
 477         crm_info("No inactive cluster nodes%s%s to remove from the membership "
 478                  "cache",
 479                  ((criterion != NULL)? " with " : ""), pcmk__s(criterion, ""));
 480     }
 481 
 482     free(search.uname);
 483     free(criterion);
 484 }
 485 
 486 static void
 487 count_peer(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 488 {
 489     unsigned int *count = user_data;
 490     crm_node_t *node = value;
 491 
 492     if (pcmk__cluster_is_node_active(node)) {
 493         *count = *count + 1;
 494     }
 495 }
 496 
 497 /*!
 498  * \internal
 499  * \brief Get the number of active cluster nodes that have been seen
 500  *
 501  * Remote nodes are never considered active. This guarantees that they can never
 502  * become DC.
 503  *
 504  * \return Number of active nodes in the cluster node cache
 505  */
 506 unsigned int
 507 pcmk__cluster_num_active_nodes(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 508 {
 509     unsigned int count = 0;
 510 
 511     if (crm_peer_cache != NULL) {
 512         g_hash_table_foreach(crm_peer_cache, count_peer, &count);
 513     }
 514     return count;
 515 }
 516 
 517 static void
 518 destroy_crm_node(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 519 {
 520     crm_node_t *node = data;
 521 
 522     crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
 523 
 524     free(node->uname);
 525     free(node->state);
 526     free(node->uuid);
 527     free(node->expected);
 528     free(node->conn_host);
 529     free(node);
 530 }
 531 
 532 /*!
 533  * \internal
 534  * \brief Initialize node caches
 535  */
 536 void
 537 pcmk__cluster_init_node_caches(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 538 {
 539     if (crm_peer_cache == NULL) {
 540         crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node);
 541     }
 542 
 543     if (crm_remote_peer_cache == NULL) {
 544         crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
 545     }
 546 
 547     if (cluster_node_cib_cache == NULL) {
 548         cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
 549     }
 550 }
 551 
 552 /*!
 553  * \internal
 554  * \brief Initialize node caches
 555  */
 556 void
 557 pcmk__cluster_destroy_node_caches(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 558 {
 559     if (crm_peer_cache != NULL) {
 560         crm_trace("Destroying peer cache with %d members",
 561                   g_hash_table_size(crm_peer_cache));
 562         g_hash_table_destroy(crm_peer_cache);
 563         crm_peer_cache = NULL;
 564     }
 565 
 566     if (crm_remote_peer_cache != NULL) {
 567         crm_trace("Destroying remote peer cache with %d members",
 568                   pcmk__cluster_num_remote_nodes());
 569         g_hash_table_destroy(crm_remote_peer_cache);
 570         crm_remote_peer_cache = NULL;
 571     }
 572 
 573     if (cluster_node_cib_cache != NULL) {
 574         crm_trace("Destroying configured cluster node cache with %d members",
 575                   g_hash_table_size(cluster_node_cib_cache));
 576         g_hash_table_destroy(cluster_node_cib_cache);
 577         cluster_node_cib_cache = NULL;
 578     }
 579 }
 580 
 581 static void (*peer_status_callback)(enum crm_status_type, crm_node_t *,
 582                                     const void *) = NULL;
 583 
 584 /*!
 585  * \internal
 586  * \brief Set a client function that will be called after peer status changes
 587  *
 588  * \param[in] dispatch  Pointer to function to use as callback
 589  *
 590  * \note Client callbacks should do only client-specific handling. Callbacks
 591  *       must not add or remove entries in the peer caches.
 592  */
 593 void
 594 pcmk__cluster_set_status_callback(void (*dispatch)(enum crm_status_type,
     /* [previous][next][first][last][top][bottom][index][help] */
 595                                                    crm_node_t *, const void *))
 596 {
 597     // @TODO Improve documentation of peer_status_callback
 598     peer_status_callback = dispatch;
 599 }
 600 
 601 /*!
 602  * \internal
 603  * \brief Tell the library whether to automatically reap lost nodes
 604  *
 605  * If \c true (the default), calling \c crm_update_peer_proc() will also update
 606  * the peer state to \c CRM_NODE_MEMBER or \c CRM_NODE_LOST, and updating the
 607  * peer state will reap peers whose state changes to anything other than
 608  * \c CRM_NODE_MEMBER.
 609  *
 610  * Callers should leave this enabled unless they plan to manage the cache
 611  * separately on their own.
 612  *
 613  * \param[in] enable  \c true to enable automatic reaping, \c false to disable
 614  */
 615 void
 616 pcmk__cluster_set_autoreap(bool enable)
     /* [previous][next][first][last][top][bottom][index][help] */
 617 {
 618     autoreap = enable;
 619 }
 620 
 621 static void
 622 dump_peer_hash(int level, const char *caller)
     /* [previous][next][first][last][top][bottom][index][help] */
 623 {
 624     GHashTableIter iter;
 625     const char *id = NULL;
 626     crm_node_t *node = NULL;
 627 
 628     g_hash_table_iter_init(&iter, crm_peer_cache);
 629     while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
 630         do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
 631     }
 632 }
 633 
 634 static gboolean
 635 hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 636 {
 637     return value == user_data;
 638 }
 639 
 640 /*!
 641  * \internal
 642  * \brief Search cluster member node cache
 643  *
 644  * \param[in] id     If not 0, cluster node ID to search for
 645  * \param[in] uname  If not NULL, node name to search for
 646  * \param[in] uuid   If not NULL while id is 0, node UUID instead of cluster
 647  *                   node ID to search for
 648  *
 649  * \return Cluster node cache entry if found, otherwise NULL
 650  */
 651 static crm_node_t *
 652 search_cluster_member_cache(unsigned int id, const char *uname,
     /* [previous][next][first][last][top][bottom][index][help] */
 653                             const char *uuid)
 654 {
 655     GHashTableIter iter;
 656     crm_node_t *node = NULL;
 657     crm_node_t *by_id = NULL;
 658     crm_node_t *by_name = NULL;
 659 
 660     CRM_ASSERT(id > 0 || uname != NULL);
 661 
 662     pcmk__cluster_init_node_caches();
 663 
 664     if (uname != NULL) {
 665         g_hash_table_iter_init(&iter, crm_peer_cache);
 666         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 667             if(node->uname && strcasecmp(node->uname, uname) == 0) {
 668                 crm_trace("Name match: %s = %p", node->uname, node);
 669                 by_name = node;
 670                 break;
 671             }
 672         }
 673     }
 674 
 675     if (id > 0) {
 676         g_hash_table_iter_init(&iter, crm_peer_cache);
 677         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 678             if(node->id == id) {
 679                 crm_trace("ID match: %u = %p", node->id, node);
 680                 by_id = node;
 681                 break;
 682             }
 683         }
 684 
 685     } else if (uuid != NULL) {
 686         g_hash_table_iter_init(&iter, crm_peer_cache);
 687         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 688             if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
 689                 crm_trace("UUID match: %s = %p", node->uuid, node);
 690                 by_id = node;
 691                 break;
 692             }
 693         }
 694     }
 695 
 696     node = by_id; /* Good default */
 697     if(by_id == by_name) {
 698         /* Nothing to do if they match (both NULL counts) */
 699         crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
 700 
 701     } else if(by_id == NULL && by_name) {
 702         crm_trace("Only one: %p for %u/%s", by_name, id, uname);
 703 
 704         if(id && by_name->id) {
 705             dump_peer_hash(LOG_WARNING, __func__);
 706             crm_crit("Node %u and %u share the same name '%s'",
 707                      id, by_name->id, uname);
 708             node = NULL; /* Create a new one */
 709 
 710         } else {
 711             node = by_name;
 712         }
 713 
 714     } else if(by_name == NULL && by_id) {
 715         crm_trace("Only one: %p for %u/%s", by_id, id, uname);
 716 
 717         if(uname && by_id->uname) {
 718             dump_peer_hash(LOG_WARNING, __func__);
 719             crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
 720                      uname, by_id->uname, id, uname);
 721         }
 722 
 723     } else if(uname && by_id->uname) {
 724         if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
 725             crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
 726             g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
 727 
 728         } else {
 729             crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
 730             dump_peer_hash(LOG_INFO, __func__);
 731             crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
 732                       TRUE);
 733         }
 734 
 735     } else if(id && by_name->id) {
 736         crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
 737 
 738     } else {
 739         /* Simple merge */
 740 
 741         /* Only corosync-based clusters use node IDs. The functions that call
 742          * pcmk__update_peer_state() and crm_update_peer_proc() only know
 743          * nodeid, so 'by_id' is authoritative when merging.
 744          */
 745         dump_peer_hash(LOG_DEBUG, __func__);
 746 
 747         crm_info("Merging %p into %p", by_name, by_id);
 748         g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
 749     }
 750 
 751     return node;
 752 }
 753 
 754 /*!
 755  * \internal
 756  * \brief Search caches for a node (cluster or Pacemaker Remote)
 757  *
 758  * \param[in] id     If not 0, cluster node ID to search for
 759  * \param[in] uname  If not NULL, node name to search for
 760  * \param[in] flags  Group of enum pcmk__node_search_flags
 761  *
 762  * \return Node cache entry if found, otherwise NULL
 763  */
 764 crm_node_t *
 765 pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 766 {
 767     crm_node_t *node = NULL;
 768 
 769     CRM_ASSERT(id > 0 || uname != NULL);
 770 
 771     pcmk__cluster_init_node_caches();
 772 
 773     if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) {
 774         node = g_hash_table_lookup(crm_remote_peer_cache, uname);
 775     }
 776 
 777     if ((node == NULL)
 778         && pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
 779 
 780         node = search_cluster_member_cache(id, uname, NULL);
 781     }
 782 
 783     if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
 784         char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);
 785 
 786         node = find_cib_cluster_node(id_str, uname);
 787         free(id_str);
 788     }
 789 
 790     return node;
 791 }
 792 
 793 /*!
 794  * \internal
 795  * \brief Purge a node from cache (both cluster and Pacemaker Remote)
 796  *
 797  * \param[in] node_name  If not NULL, purge only nodes with this name
 798  * \param[in] node_id    If not 0, purge cluster nodes only if they have this ID
 799  *
 800  * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged.
 801  *       If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote
 802  *       nodes that match \p node_name will be purged, and cluster nodes that
 803  *       match both \p node_name and \p node_id will be purged.
 804  * \note The caller must be careful not to use \p node_name after calling this
 805  *       function if it might be a pointer into a cache entry being removed.
 806  */
 807 void
 808 pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 809 {
 810     char *node_name_copy = NULL;
 811 
 812     if ((node_name == NULL) && (node_id == 0U)) {
 813         return;
 814     }
 815 
 816     // Purge from Pacemaker Remote node cache
 817     if ((node_name != NULL)
 818         && (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) {
 819         /* node_name could be a pointer into the cache entry being purged,
 820          * so reassign it to a copy before the original gets freed
 821          */
 822         node_name_copy = pcmk__str_copy(node_name);
 823         node_name = node_name_copy;
 824 
 825         crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
 826         g_hash_table_remove(crm_remote_peer_cache, node_name);
 827     }
 828 
 829     pcmk__cluster_forget_cluster_node(node_id, node_name);
 830     free(node_name_copy);
 831 }
 832 
 833 #if SUPPORT_COROSYNC
 834 static guint
 835 remove_conflicting_peer(crm_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 836 {
 837     int matches = 0;
 838     GHashTableIter iter;
 839     crm_node_t *existing_node = NULL;
 840 
 841     if (node->id == 0 || node->uname == NULL) {
 842         return 0;
 843     }
 844 
 845     if (!pcmk__corosync_has_nodelist()) {
 846         return 0;
 847     }
 848 
 849     g_hash_table_iter_init(&iter, crm_peer_cache);
 850     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
 851         if (existing_node->id > 0
 852             && existing_node->id != node->id
 853             && existing_node->uname != NULL
 854             && strcasecmp(existing_node->uname, node->uname) == 0) {
 855 
 856             if (pcmk__cluster_is_node_active(existing_node)) {
 857                 continue;
 858             }
 859 
 860             crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
 861                      existing_node->id, existing_node->uname, node->id);
 862 
 863             g_hash_table_iter_remove(&iter);
 864             matches++;
 865         }
 866     }
 867 
 868     return matches;
 869 }
 870 #endif
 871 
 872 /*!
 873  * \internal
 874  * \brief Get a cluster node cache entry, possibly creating one if not found
 875  *
 876  * If \c pcmk__node_search_cluster_member is set in \p flags, the return value
 877  * is guaranteed not to be \c NULL. A new cache entry is created if one does not
 878  * already exist.
 879  *
 880  * \param[in] id     If not 0, cluster node ID to search for
 881  * \param[in] uname  If not NULL, node name to search for
 882  * \param[in] uuid   If not NULL while id is 0, node UUID instead of cluster
 883  *                   node ID to search for
 884  * \param[in] flags  Group of enum pcmk__node_search_flags
 885  *
 886  * \return (Possibly newly created) cluster node cache entry
 887  */
 888 /* coverity[-alloc] Memory is referenced in one or both hashtables */
 889 crm_node_t *
 890 pcmk__get_node(unsigned int id, const char *uname, const char *uuid,
     /* [previous][next][first][last][top][bottom][index][help] */
 891                uint32_t flags)
 892 {
 893     crm_node_t *node = NULL;
 894     char *uname_lookup = NULL;
 895 
 896     CRM_ASSERT(id > 0 || uname != NULL);
 897 
 898     pcmk__cluster_init_node_caches();
 899 
 900     // Check the Pacemaker Remote node cache first
 901     if (pcmk_is_set(flags, pcmk__node_search_remote)) {
 902         node = g_hash_table_lookup(crm_remote_peer_cache, uname);
 903         if (node != NULL) {
 904             return node;
 905         }
 906     }
 907 
 908     if (!pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
 909         return NULL;
 910     }
 911 
 912     node = search_cluster_member_cache(id, uname, uuid);
 913 
 914     /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
 915      * we need to do a lookup of the node name using the id in the cluster membership. */
 916     if ((node == NULL || node->uname == NULL) && (uname == NULL)) { 
 917         uname_lookup = pcmk__cluster_node_name(id);
 918     }
 919 
 920     if (uname_lookup) {
 921         uname = uname_lookup;
 922         crm_trace("Inferred a name of '%s' for node %u", uname, id);
 923 
 924         /* try to turn up the node one more time now that we know the uname. */
 925         if (node == NULL) {
 926             node = search_cluster_member_cache(id, uname, uuid);
 927         }
 928     }
 929 
 930     if (node == NULL) {
 931         char *uniqueid = crm_generate_uuid();
 932 
 933         node = pcmk__assert_alloc(1, sizeof(crm_node_t));
 934 
 935         crm_info("Created entry %s/%p for node %s/%u (%d total)",
 936                  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
 937         g_hash_table_replace(crm_peer_cache, uniqueid, node);
 938     }
 939 
 940     if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
 941         crm_info("Node %u is now known as %s", id, uname);
 942     }
 943 
 944     if(id > 0 && node->id == 0) {
 945         node->id = id;
 946     }
 947 
 948     if (uname && (node->uname == NULL)) {
 949         update_peer_uname(node, uname);
 950     }
 951 
 952     if(node->uuid == NULL) {
 953         if (uuid == NULL) {
 954             uuid = pcmk__cluster_node_uuid(node);
 955         }
 956 
 957         if (uuid) {
 958             crm_info("Node %u has uuid %s", id, uuid);
 959 
 960         } else {
 961             crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
 962         }
 963     }
 964 
 965     free(uname_lookup);
 966 
 967     return node;
 968 }
 969 
 970 /*!
 971  * \internal
 972  * \brief Update a node's uname
 973  *
 974  * \param[in,out] node   Node object to update
 975  * \param[in]     uname  New name to set
 976  *
 977  * \note This function should not be called within a peer cache iteration,
 978  *       because in some cases it can remove conflicting cache entries,
 979  *       which would invalidate the iterator.
 980  */
 981 static void
 982 update_peer_uname(crm_node_t *node, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
 983 {
 984     CRM_CHECK(uname != NULL,
 985               crm_err("Bug: can't update node name without name"); return);
 986     CRM_CHECK(node != NULL,
 987               crm_err("Bug: can't update node name to %s without node", uname);
 988               return);
 989 
 990     if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
 991         crm_debug("Node uname '%s' did not change", uname);
 992         return;
 993     }
 994 
 995     for (const char *c = uname; *c; ++c) {
 996         if ((*c >= 'A') && (*c <= 'Z')) {
 997             crm_warn("Node names with capitals are discouraged, consider changing '%s'",
 998                      uname);
 999             break;
1000         }
1001     }
1002 
1003     pcmk__str_update(&node->uname, uname);
1004 
1005     if (peer_status_callback != NULL) {
1006         peer_status_callback(crm_status_uname, node, NULL);
1007     }
1008 
1009 #if SUPPORT_COROSYNC
1010     if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)
1011         && !pcmk_is_set(node->flags, crm_remote_node)) {
1012 
1013         remove_conflicting_peer(node);
1014     }
1015 #endif
1016 }
1017 
1018 /*!
1019  * \internal
1020  * \brief Get log-friendly string equivalent of a process flag
1021  *
1022  * \param[in] proc  Process flag
1023  *
1024  * \return Log-friendly string equivalent of \p proc
1025  */
1026 static inline const char *
1027 proc2text(enum crm_proc_flag proc)
     /* [previous][next][first][last][top][bottom][index][help] */
1028 {
1029     const char *text = "unknown";
1030 
1031     switch (proc) {
1032         case crm_proc_none:
1033             text = "none";
1034             break;
1035         case crm_proc_cpg:
1036             text = "corosync-cpg";
1037             break;
1038     }
1039     return text;
1040 }
1041 
1042 /*!
1043  * \internal
1044  * \brief Update a node's process information (and potentially state)
1045  *
1046  * \param[in]     source  Caller's function name (for log messages)
1047  * \param[in,out] node    Node object to update
1048  * \param[in]     flag    Bitmask of new process information
1049  * \param[in]     status  node status (online, offline, etc.)
1050  *
1051  * \return NULL if any node was reaped from peer caches, value of node otherwise
1052  *
1053  * \note If this function returns NULL, the supplied node object was likely
1054  *       freed and should not be used again. This function should not be
1055  *       called within a cache iteration if reaping is possible, otherwise
1056  *       reaping could invalidate the iterator.
1057  */
1058 crm_node_t *
1059 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
     /* [previous][next][first][last][top][bottom][index][help] */
1060 {
1061     uint32_t last = 0;
1062     gboolean changed = FALSE;
1063 
1064     CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
1065                                     source, proc2text(flag), status);
1066                             return NULL);
1067 
1068     /* Pacemaker doesn't spawn processes on remote nodes */
1069     if (pcmk_is_set(node->flags, crm_remote_node)) {
1070         return node;
1071     }
1072 
1073     last = node->processes;
1074     if (status == NULL) {
1075         node->processes = flag;
1076         if (node->processes != last) {
1077             changed = TRUE;
1078         }
1079 
1080     } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1081         if ((node->processes & flag) != flag) {
1082             node->processes = pcmk__set_flags_as(__func__, __LINE__,
1083                                                  LOG_TRACE, "Peer process",
1084                                                  node->uname, node->processes,
1085                                                  flag, "processes");
1086             changed = TRUE;
1087         }
1088 
1089     } else if (node->processes & flag) {
1090         node->processes = pcmk__clear_flags_as(__func__, __LINE__,
1091                                                LOG_TRACE, "Peer process",
1092                                                node->uname, node->processes,
1093                                                flag, "processes");
1094         changed = TRUE;
1095     }
1096 
1097     if (changed) {
1098         if (status == NULL && flag <= crm_proc_none) {
1099             crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
1100                      node->id);
1101         } else {
1102             crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
1103                      proc2text(flag), status);
1104         }
1105 
1106         if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1107             node->when_online = time(NULL);
1108 
1109         } else {
1110             node->when_online = 0;
1111         }
1112 
1113         /* Call the client callback first, then update the peer state,
1114          * in case the node will be reaped
1115          */
1116         if (peer_status_callback != NULL) {
1117             peer_status_callback(crm_status_processes, node, &last);
1118         }
1119 
1120         /* The client callback shouldn't touch the peer caches,
1121          * but as a safety net, bail if the peer cache was destroyed.
1122          */
1123         if (crm_peer_cache == NULL) {
1124             return NULL;
1125         }
1126 
1127         if (autoreap) {
1128             const char *peer_state = NULL;
1129 
1130             if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1131                 peer_state = CRM_NODE_MEMBER;
1132             } else {
1133                 peer_state = CRM_NODE_LOST;
1134             }
1135             node = pcmk__update_peer_state(__func__, node, peer_state, 0);
1136         }
1137     } else {
1138         crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
1139                   proc2text(flag), status);
1140     }
1141     return node;
1142 }
1143 
1144 /*!
1145  * \internal
1146  * \brief Update a cluster node cache entry's expected join state
1147  *
1148  * \param[in]     source    Caller's function name (for logging)
1149  * \param[in,out] node      Node to update
1150  * \param[in]     expected  Node's new join state
1151  */
1152 void
1153 pcmk__update_peer_expected(const char *source, crm_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1154                            const char *expected)
1155 {
1156     char *last = NULL;
1157     gboolean changed = FALSE;
1158 
1159     CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
1160               return);
1161 
1162     /* Remote nodes don't participate in joins */
1163     if (pcmk_is_set(node->flags, crm_remote_node)) {
1164         return;
1165     }
1166 
1167     last = node->expected;
1168     if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
1169         node->expected = strdup(expected);
1170         changed = TRUE;
1171     }
1172 
1173     if (changed) {
1174         crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
1175                  expected, last);
1176         free(last);
1177     } else {
1178         crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
1179                   node->id, expected);
1180     }
1181 }
1182 
1183 /*!
1184  * \internal
1185  * \brief Update a node's state and membership information
1186  *
1187  * \param[in]     source      Caller's function name (for log messages)
1188  * \param[in,out] node        Node object to update
1189  * \param[in]     state       Node's new state
1190  * \param[in]     membership  Node's new membership ID
1191  * \param[in,out] iter        If not NULL, pointer to node's peer cache iterator
1192  *
1193  * \return NULL if any node was reaped, value of node otherwise
1194  *
1195  * \note If this function returns NULL, the supplied node object was likely
1196  *       freed and should not be used again. This function may be called from
1197  *       within a peer cache iteration if the iterator is supplied.
1198  */
1199 static crm_node_t *
1200 update_peer_state_iter(const char *source, crm_node_t *node, const char *state,
     /* [previous][next][first][last][top][bottom][index][help] */
1201                        uint64_t membership, GHashTableIter *iter)
1202 {
1203     gboolean is_member;
1204 
1205     CRM_CHECK(node != NULL,
1206               crm_err("Could not set state for unknown host to %s"
1207                       CRM_XS " source=%s", state, source);
1208               return NULL);
1209 
1210     is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
1211     if (is_member) {
1212         node->when_lost = 0;
1213         if (membership) {
1214             node->last_seen = membership;
1215         }
1216     }
1217 
1218     if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
1219         char *last = node->state;
1220 
1221         if (is_member) {
1222              node->when_member = time(NULL);
1223 
1224         } else {
1225              node->when_member = 0;
1226         }
1227 
1228         node->state = strdup(state);
1229         crm_notice("Node %s state is now %s " CRM_XS
1230                    " nodeid=%u previous=%s source=%s", node->uname, state,
1231                    node->id, (last? last : "unknown"), source);
1232         if (peer_status_callback != NULL) {
1233             peer_status_callback(crm_status_nstate, node, last);
1234         }
1235         free(last);
1236 
1237         if (autoreap && !is_member
1238             && !pcmk_is_set(node->flags, crm_remote_node)) {
1239             /* We only autoreap from the peer cache, not the remote peer cache,
1240              * because the latter should be managed only by
1241              * refresh_remote_nodes().
1242              */
1243             if(iter) {
1244                 crm_notice("Purged 1 peer with " PCMK_XA_ID
1245                            "=%u and/or uname=%s from the membership cache",
1246                            node->id, node->uname);
1247                 g_hash_table_iter_remove(iter);
1248 
1249             } else {
1250                 pcmk__cluster_forget_cluster_node(node->id, node->uname);
1251             }
1252             node = NULL;
1253         }
1254 
1255     } else {
1256         crm_trace("Node %s state is unchanged (%s) " CRM_XS
1257                   " nodeid=%u source=%s", node->uname, state, node->id, source);
1258     }
1259     return node;
1260 }
1261 
1262 /*!
1263  * \brief Update a node's state and membership information
1264  *
1265  * \param[in]     source      Caller's function name (for log messages)
1266  * \param[in,out] node        Node object to update
1267  * \param[in]     state       Node's new state
1268  * \param[in]     membership  Node's new membership ID
1269  *
1270  * \return NULL if any node was reaped, value of node otherwise
1271  *
1272  * \note If this function returns NULL, the supplied node object was likely
1273  *       freed and should not be used again. This function should not be
1274  *       called within a cache iteration if reaping is possible,
1275  *       otherwise reaping could invalidate the iterator.
1276  */
1277 crm_node_t *
1278 pcmk__update_peer_state(const char *source, crm_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1279                         const char *state, uint64_t membership)
1280 {
1281     return update_peer_state_iter(source, node, state, membership, NULL);
1282 }
1283 
1284 /*!
1285  * \internal
1286  * \brief Reap all nodes from cache whose membership information does not match
1287  *
1288  * \param[in] membership  Membership ID of nodes to keep
1289  */
1290 void
1291 pcmk__reap_unseen_nodes(uint64_t membership)
     /* [previous][next][first][last][top][bottom][index][help] */
1292 {
1293     GHashTableIter iter;
1294     crm_node_t *node = NULL;
1295 
1296     crm_trace("Reaping unseen nodes...");
1297     g_hash_table_iter_init(&iter, crm_peer_cache);
1298     while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1299         if (node->last_seen != membership) {
1300             if (node->state) {
1301                 /*
1302                  * Calling update_peer_state_iter() allows us to
1303                  * remove the node from crm_peer_cache without
1304                  * invalidating our iterator
1305                  */
1306                 update_peer_state_iter(__func__, node, CRM_NODE_LOST,
1307                                            membership, &iter);
1308 
1309             } else {
1310                 crm_info("State of node %s[%u] is still unknown",
1311                          node->uname, node->id);
1312             }
1313         }
1314     }
1315 }
1316 
1317 static crm_node_t *
1318 find_cib_cluster_node(const char *id, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
1319 {
1320     GHashTableIter iter;
1321     crm_node_t *node = NULL;
1322     crm_node_t *by_id = NULL;
1323     crm_node_t *by_name = NULL;
1324 
1325     if (uname) {
1326         g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1327         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1328             if (node->uname && strcasecmp(node->uname, uname) == 0) {
1329                 crm_trace("Name match: %s = %p", node->uname, node);
1330                 by_name = node;
1331                 break;
1332             }
1333         }
1334     }
1335 
1336     if (id) {
1337         g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1338         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1339             if(strcasecmp(node->uuid, id) == 0) {
1340                 crm_trace("ID match: %s= %p", id, node);
1341                 by_id = node;
1342                 break;
1343             }
1344         }
1345     }
1346 
1347     node = by_id; /* Good default */
1348     if (by_id == by_name) {
1349         /* Nothing to do if they match (both NULL counts) */
1350         crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1351 
1352     } else if (by_id == NULL && by_name) {
1353         crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1354 
1355         if (id) {
1356             node = NULL;
1357 
1358         } else {
1359             node = by_name;
1360         }
1361 
1362     } else if (by_name == NULL && by_id) {
1363         crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1364 
1365         if (uname) {
1366             node = NULL;
1367         }
1368 
1369     } else if (uname && by_id->uname
1370                && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
1371         /* Multiple nodes have the same uname in the CIB.
1372          * Return by_id. */
1373 
1374     } else if (id && by_name->uuid
1375                && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
1376         /* Multiple nodes have the same id in the CIB.
1377          * Return by_name. */
1378         node = by_name;
1379 
1380     } else {
1381         node = NULL;
1382     }
1383 
1384     if (node == NULL) {
1385         crm_debug("Couldn't find node%s%s%s%s",
1386                    id? " " : "",
1387                    id? id : "",
1388                    uname? " with name " : "",
1389                    uname? uname : "");
1390     }
1391 
1392     return node;
1393 }
1394 
1395 static void
1396 cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1397 {
1398     const char *id = crm_element_value(xml_node, PCMK_XA_ID);
1399     const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME);
1400     crm_node_t * node =  NULL;
1401 
1402     CRM_CHECK(id != NULL && uname !=NULL, return);
1403     node = find_cib_cluster_node(id, uname);
1404 
1405     if (node == NULL) {
1406         char *uniqueid = crm_generate_uuid();
1407 
1408         node = pcmk__assert_alloc(1, sizeof(crm_node_t));
1409 
1410         node->uname = pcmk__str_copy(uname);
1411         node->uuid = pcmk__str_copy(id);
1412 
1413         g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);
1414 
1415     } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
1416         pcmk__str_update(&node->uname, uname);
1417 
1418         /* Node is in cache and hasn't been updated already, so mark it clean */
1419         clear_peer_flags(node, crm_node_dirty);
1420     }
1421 
1422 }
1423 
1424 static void
1425 refresh_cluster_node_cib_cache(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1426 {
1427     pcmk__cluster_init_node_caches();
1428 
1429     g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);
1430 
1431     crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG,
1432                              cluster_node_cib_cache_refresh_helper, NULL);
1433 
1434     // Remove all old cache entries that weren't seen in the CIB
1435     g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
1436 }
1437 
1438 void
1439 pcmk__refresh_node_caches_from_cib(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1440 {
1441     refresh_remote_nodes(cib);
1442     refresh_cluster_node_cib_cache(cib);
1443 }
1444 
1445 // Deprecated functions kept only for backward API compatibility
1446 // LCOV_EXCL_START
1447 
1448 #include <crm/cluster/compat.h>
1449 
1450 int
1451 crm_terminate_member(int nodeid, const char *uname, void *unused)
     /* [previous][next][first][last][top][bottom][index][help] */
1452 {
1453     return stonith_api_kick(nodeid, uname, 120, TRUE);
1454 }
1455 
1456 int
1457 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
     /* [previous][next][first][last][top][bottom][index][help] */
1458 {
1459     return stonith_api_kick(nodeid, uname, 120, TRUE);
1460 }
1461 
1462 crm_node_t *
1463 crm_get_peer(unsigned int id, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
1464 {
1465     return pcmk__get_node(id, uname, NULL, pcmk__node_search_cluster_member);
1466 }
1467 
1468 crm_node_t *
1469 crm_get_peer_full(unsigned int id, const char *uname, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
1470 {
1471     return pcmk__get_node(id, uname, NULL, flags);
1472 }
1473 
1474 int
1475 crm_remote_peer_cache_size(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1476 {
1477     unsigned int count = pcmk__cluster_num_remote_nodes();
1478 
1479     return QB_MIN(count, INT_MAX);
1480 }
1481 
1482 void
1483 crm_remote_peer_cache_refresh(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1484 {
1485     refresh_remote_nodes(cib);
1486 }
1487 
1488 crm_node_t *
1489 crm_remote_peer_get(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
1490 {
1491     return pcmk__cluster_lookup_remote_node(node_name);
1492 }
1493 
1494 void
1495 crm_remote_peer_cache_remove(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
1496 {
1497     pcmk__cluster_forget_remote_node(node_name);
1498 }
1499 
1500 gboolean
1501 crm_is_peer_active(const crm_node_t * node)
     /* [previous][next][first][last][top][bottom][index][help] */
1502 {
1503     return pcmk__cluster_is_node_active(node);
1504 }
1505 
1506 guint
1507 crm_active_peers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1508 {
1509     return pcmk__cluster_num_active_nodes();
1510 }
1511 
1512 guint
1513 reap_crm_member(uint32_t id, const char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
1514 {
1515     int matches = 0;
1516     crm_node_t search = { 0, };
1517 
1518     if (crm_peer_cache == NULL) {
1519         crm_trace("Membership cache not initialized, ignoring purge request");
1520         return 0;
1521     }
1522 
1523     search.id = id;
1524     search.uname = pcmk__str_copy(name);
1525     matches = g_hash_table_foreach_remove(crm_peer_cache,
1526                                           should_forget_cluster_node, &search);
1527     if(matches) {
1528         crm_notice("Purged %d peer%s with " PCMK_XA_ID
1529                    "=%u%s%s from the membership cache",
1530                    matches, pcmk__plural_s(matches), search.id,
1531                    (search.uname? " and/or uname=" : ""),
1532                    (search.uname? search.uname : ""));
1533 
1534     } else {
1535         crm_info("No peers with " PCMK_XA_ID
1536                  "=%u%s%s to purge from the membership cache",
1537                  search.id, (search.uname? " and/or uname=" : ""),
1538                  (search.uname? search.uname : ""));
1539     }
1540 
1541     free(search.uname);
1542     return matches;
1543 }
1544 
1545 void
1546 crm_peer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1547 {
1548     pcmk__cluster_init_node_caches();
1549 }
1550 
1551 void
1552 crm_peer_destroy(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1553 {
1554     pcmk__cluster_destroy_node_caches();
1555 }
1556 
1557 void
1558 crm_set_autoreap(gboolean enable)
     /* [previous][next][first][last][top][bottom][index][help] */
1559 {
1560     pcmk__cluster_set_autoreap(enable);
1561 }
1562 
1563 void
1564 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
     /* [previous][next][first][last][top][bottom][index][help] */
1565 {
1566     pcmk__cluster_set_status_callback(dispatch);
1567 }
1568 
1569 // LCOV_EXCL_STOP
1570 // End deprecated API

/* [previous][next][first][last][top][bottom][index][help] */