root/lib/cluster/membership.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. crm_remote_peer_cache_size
  2. crm_remote_peer_get
  3. crm_remote_peer_cache_add
  4. crm_remote_peer_cache_remove
  5. remote_state_from_cib
  6. remote_cache_refresh_helper
  7. mark_dirty
  8. is_dirty
  9. crm_remote_peer_cache_refresh
  10. crm_is_peer_active
  11. crm_reap_dead_member
  12. reap_crm_member
  13. crm_count_peer
  14. crm_active_peers
  15. destroy_crm_node
  16. crm_peer_init
  17. crm_peer_destroy
  18. crm_set_status_callback
  19. crm_set_autoreap
  20. crm_dump_peer_hash
  21. crm_hash_find_by_data
  22. crm_find_peer_full
  23. crm_get_peer_full
  24. crm_find_peer
  25. crm_remove_conflicting_peer
  26. crm_get_peer
  27. crm_update_peer
  28. crm_update_peer_uname
  29. crm_update_peer_proc
  30. crm_update_peer_expected
  31. crm_update_peer_state_iter
  32. crm_update_peer_state
  33. crm_reap_unseen_nodes
  34. crm_terminate_member
  35. crm_terminate_member_no_mainloop

   1 /*
   2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18 #include <crm_internal.h>
  19 
  20 #ifndef _GNU_SOURCE
  21 #  define _GNU_SOURCE
  22 #endif
  23 
  24 #include <sys/param.h>
  25 #include <sys/types.h>
  26 #include <stdio.h>
  27 #include <unistd.h>
  28 #include <string.h>
  29 #include <glib.h>
  30 #include <crm/common/ipc.h>
  31 #include <crm/cluster/internal.h>
  32 #include <crm/msg_xml.h>
  33 #include <crm/stonith-ng.h>
  34 
  35 #define s_if_plural(i) (((i) == 1)? "" : "s")
  36 
  37 /* The peer cache remembers cluster nodes that have been seen.
  38  * This is managed mostly automatically by libcluster, based on
  39  * cluster membership events.
  40  *
  41  * Because cluster nodes can have conflicting names or UUIDs,
  42  * the hash table key is a uniquely generated ID.
  43  */
  44 GHashTable *crm_peer_cache = NULL;
  45 
  46 /*
  47  * The remote peer cache tracks pacemaker_remote nodes. While the
  48  * value has the same type as the peer cache's, it is tracked separately for
  49  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
  50  * so the name (which is also the UUID) is used as the hash table key; there
  51  * is no equivalent of membership events, so management is not automatic; and
  52  * most users of the peer cache need to exclude pacemaker_remote nodes.
  53  *
  54  * That said, using a single cache would be more logical and less error-prone,
  55  * so it would be a good idea to merge them one day.
  56  *
  57  * libcluster provides two avenues for populating the cache:
  58  * crm_remote_peer_get(), crm_remote_peer_cache_add() and
  59  * crm_remote_peer_cache_remove() directly manage it,
  60  * while crm_remote_peer_cache_refresh() populates it via the CIB.
  61  */
  62 GHashTable *crm_remote_peer_cache = NULL;
  63 
  64 unsigned long long crm_peer_seq = 0;
  65 gboolean crm_have_quorum = FALSE;
  66 static gboolean crm_autoreap  = TRUE;
  67 
  68 int
  69 crm_remote_peer_cache_size(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  70 {
  71     if (crm_remote_peer_cache == NULL) {
  72         return 0;
  73     }
  74     return g_hash_table_size(crm_remote_peer_cache);
  75 }
  76 
  77 /*!
  78  * \brief Get a remote node peer cache entry, creating it if necessary
  79  *
  80  * \param[in] node_name  Name of remote node
  81  *
  82  * \return Cache entry for node on success, NULL (and set errno) otherwise
  83  *
  84  * \note When creating a new entry, this will leave the node state undetermined,
  85  *       so the caller should also call crm_update_peer_state() if the state is
  86  *       known.
  87  */
  88 crm_node_t *
  89 crm_remote_peer_get(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
  90 {
  91     crm_node_t *node;
  92 
  93     if (node_name == NULL) {
  94         errno = -EINVAL;
  95         return NULL;
  96     }
  97 
  98     /* Return existing cache entry if one exists */
  99     node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
 100     if (node) {
 101         return node;
 102     }
 103 
 104     /* Allocate a new entry */
 105     node = calloc(1, sizeof(crm_node_t));
 106     if (node == NULL) {
 107         return NULL;
 108     }
 109 
 110     /* Populate the essential information */
 111     node->flags = crm_remote_node;
 112     node->uuid = strdup(node_name);
 113     if (node->uuid == NULL) {
 114         free(node);
 115         errno = -ENOMEM;
 116         return NULL;
 117     }
 118 
 119     /* Add the new entry to the cache */
 120     g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
 121     crm_trace("added %s to remote cache", node_name);
 122 
 123     /* Update the entry's uname, ensuring peer status callbacks are called */
 124     crm_update_peer_uname(node, node_name);
 125     return node;
 126 }
 127 
 128 /*!
 129  * \brief Add a node to the remote peer cache
 130  *
 131  * \param[in] node_name  Name of remote node
 132  *
 133  * \note This is a legacy convenience wrapper for crm_remote_peer_get()
 134  *       for callers that don't need the cache entry returned.
 135  */
 136 void
 137 crm_remote_peer_cache_add(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 138 {
 139     CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
 140 }
 141 
 142 void
 143 crm_remote_peer_cache_remove(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 144 {
 145     if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
 146         crm_trace("removed %s from remote peer cache", node_name);
 147     }
 148 }
 149 
 150 /*!
 151  * \internal
 152  * \brief Return node status based on a CIB status entry
 153  *
 154  * \param[in] node_state  XML of node state
 155  *
 156  * \return CRM_NODE_LOST if XML_NODE_IN_CLUSTER is false in node_state,
 157  *         CRM_NODE_MEMBER otherwise
 158  * \note Unlike most boolean XML attributes, this one defaults to true, for
 159  *       backward compatibility with older crmd versions that don't set it.
 160  */
 161 static const char *
 162 remote_state_from_cib(xmlNode *node_state)
     /* [previous][next][first][last][top][bottom][index][help] */
 163 {
 164     const char *status;
 165 
 166     status = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
 167     if (status && !crm_is_true(status)) {
 168         status = CRM_NODE_LOST;
 169     } else {
 170         status = CRM_NODE_MEMBER;
 171     }
 172     return status;
 173 }
 174 
 175 /* user data for looping through remote node xpath searches */
 176 struct refresh_data {
 177     const char *field;  /* XML attribute to check for node name */
 178     gboolean has_state; /* whether to update node state based on XML */
 179 };
 180 
 181 /*!
 182  * \internal
 183  * \brief Process one pacemaker_remote node xpath search result
 184  *
 185  * \param[in] result     XML search result
 186  * \param[in] user_data  what to look for in the XML
 187  */
 188 static void
 189 remote_cache_refresh_helper(xmlNode *result, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 190 {
 191     struct refresh_data *data = user_data;
 192     const char *remote = crm_element_value(result, data->field);
 193     const char *state = NULL;
 194     crm_node_t *node;
 195 
 196     CRM_CHECK(remote != NULL, return);
 197 
 198     /* Determine node's state, if the result has it */
 199     if (data->has_state) {
 200         state = remote_state_from_cib(result);
 201     }
 202 
 203     /* Check whether cache already has entry for node */
 204     node = g_hash_table_lookup(crm_remote_peer_cache, remote);
 205 
 206     if (node == NULL) {
 207         /* Node is not in cache, so add a new entry for it */
 208         node = crm_remote_peer_get(remote);
 209         CRM_ASSERT(node);
 210         if (state) {
 211             crm_update_peer_state(__FUNCTION__, node, state, 0);
 212         }
 213 
 214     } else if (is_set(node->flags, crm_node_dirty)) {
 215         /* Node is in cache and hasn't been updated already, so mark it clean */
 216         clear_bit(node->flags, crm_node_dirty);
 217         if (state) {
 218             crm_update_peer_state(__FUNCTION__, node, state, 0);
 219         }
 220     }
 221 }
 222 
 223 static void
 224 mark_dirty(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 225 {
 226     set_bit(((crm_node_t*)value)->flags, crm_node_dirty);
 227 }
 228 
 229 static gboolean
 230 is_dirty(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 231 {
 232     return is_set(((crm_node_t*)value)->flags, crm_node_dirty);
 233 }
 234 
 235 /* search string to find CIB resources entries for guest nodes */
 236 #define XPATH_GUEST_NODE_CONFIG \
 237     "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
 238     "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \
 239     "[@name='" XML_RSC_ATTR_REMOTE_NODE "']"
 240 
 241 /* search string to find CIB resources entries for remote nodes */
 242 #define XPATH_REMOTE_NODE_CONFIG \
 243     "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
 244     "[@type='remote'][@provider='pacemaker']"
 245 
 246 /* search string to find CIB node status entries for pacemaker_remote nodes */
 247 #define XPATH_REMOTE_NODE_STATUS \
 248     "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \
 249     "[@" XML_NODE_IS_REMOTE "='true']"
 250 
 251 /*!
 252  * \brief Repopulate the remote peer cache based on CIB XML
 253  *
 254  * \param[in] xmlNode  CIB XML to parse
 255  */
 256 void
 257 crm_remote_peer_cache_refresh(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
 258 {
 259     struct refresh_data data;
 260 
 261     crm_peer_init();
 262 
 263     /* First, we mark all existing cache entries as dirty,
 264      * so that later we can remove any that weren't in the CIB.
 265      * We don't empty the cache, because we need to detect changes in state.
 266      */
 267     g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
 268 
 269     /* Look for guest nodes and remote nodes in the status section */
 270     data.field = "id";
 271     data.has_state = TRUE;
 272     crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_STATUS,
 273                              remote_cache_refresh_helper, &data);
 274 
 275     /* Look for guest nodes and remote nodes in the configuration section,
 276      * because they may have just been added and not have a status entry yet.
 277      * In that case, the cached node state will be left NULL, so that the
 278      * peer status callback isn't called until we're sure the node started
 279      * successfully.
 280      */
 281     data.field = "value";
 282     data.has_state = FALSE;
 283     crm_foreach_xpath_result(cib, XPATH_GUEST_NODE_CONFIG,
 284                              remote_cache_refresh_helper, &data);
 285     data.field = "id";
 286     data.has_state = FALSE;
 287     crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_CONFIG,
 288                              remote_cache_refresh_helper, &data);
 289 
 290     /* Remove all old cache entries that weren't seen in the CIB */
 291     g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
 292 }
 293 
 294 gboolean
 295 crm_is_peer_active(const crm_node_t * node)
     /* [previous][next][first][last][top][bottom][index][help] */
 296 {
 297     if(node == NULL) {
 298         return FALSE;
 299     }
 300 
 301     if (is_set(node->flags, crm_remote_node)) {
 302         /* remote nodes are never considered active members. This
 303          * guarantees they will never be considered for DC membership.*/
 304         return FALSE;
 305     }
 306 #if SUPPORT_COROSYNC
 307     if (is_openais_cluster()) {
 308         return crm_is_corosync_peer_active(node);
 309     }
 310 #endif
 311 #if SUPPORT_HEARTBEAT
 312     if (is_heartbeat_cluster()) {
 313         return crm_is_heartbeat_peer_active(node);
 314     }
 315 #endif
 316     crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
 317     return FALSE;
 318 }
 319 
 320 static gboolean
 321 crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 322 {
 323     crm_node_t *node = value;
 324     crm_node_t *search = user_data;
 325 
 326     if (search == NULL) {
 327         return FALSE;
 328 
 329     } else if (search->id && node->id != search->id) {
 330         return FALSE;
 331 
 332     } else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) {
 333         return FALSE;
 334 
 335     } else if (crm_is_peer_active(value) == FALSE) {
 336         crm_info("Removing node with name %s and id %u from membership cache",
 337                  (node->uname? node->uname : "unknown"), node->id);
 338         return TRUE;
 339     }
 340     return FALSE;
 341 }
 342 
 343 /*!
 344  * \brief Remove all peer cache entries matching a node ID and/or uname
 345  *
 346  * \param[in] id    ID of node to remove (or 0 to ignore)
 347  * \param[in] name  Uname of node to remove (or NULL to ignore)
 348  *
 349  * \return Number of cache entries removed
 350  */
 351 guint
 352 reap_crm_member(uint32_t id, const char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
 353 {
 354     int matches = 0;
 355     crm_node_t search;
 356 
 357     if (crm_peer_cache == NULL) {
 358         crm_trace("Membership cache not initialized, ignoring purge request");
 359         return 0;
 360     }
 361 
 362     search.id = id;
 363     search.uname = name ? strdup(name) : NULL;
 364     matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
 365     if(matches) {
 366         crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache",
 367                    matches, s_if_plural(matches), search.id,
 368                    (search.uname? " and/or uname=" : ""),
 369                    (search.uname? search.uname : ""));
 370 
 371     } else {
 372         crm_info("No peers with id=%u%s%s to purge from the membership cache",
 373                  search.id, (search.uname? " and/or uname=" : ""),
 374                  (search.uname? search.uname : ""));
 375     }
 376 
 377     free(search.uname);
 378     return matches;
 379 }
 380 
 381 static void
 382 crm_count_peer(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 383 {
 384     guint *count = user_data;
 385     crm_node_t *node = value;
 386 
 387     if (crm_is_peer_active(node)) {
 388         *count = *count + 1;
 389     }
 390 }
 391 
 392 guint
 393 crm_active_peers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 394 {
 395     guint count = 0;
 396 
 397     if (crm_peer_cache) {
 398         g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
 399     }
 400     return count;
 401 }
 402 
 403 static void
 404 destroy_crm_node(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 405 {
 406     crm_node_t *node = data;
 407 
 408     crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
 409 
 410     free(node->addr);
 411     free(node->uname);
 412     free(node->state);
 413     free(node->uuid);
 414     free(node->expected);
 415     free(node);
 416 }
 417 
 418 void
 419 crm_peer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 420 {
 421     if (crm_peer_cache == NULL) {
 422         crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
 423     }
 424 
 425     if (crm_remote_peer_cache == NULL) {
 426         crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
 427     }
 428 }
 429 
 430 void
 431 crm_peer_destroy(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 432 {
 433     if (crm_peer_cache != NULL) {
 434         crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
 435         g_hash_table_destroy(crm_peer_cache);
 436         crm_peer_cache = NULL;
 437     }
 438 
 439     if (crm_remote_peer_cache != NULL) {
 440         crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
 441         g_hash_table_destroy(crm_remote_peer_cache);
 442         crm_remote_peer_cache = NULL;
 443     }
 444 }
 445 
 446 void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
 447 
 448 /*!
 449  * \brief Set a client function that will be called after peer status changes
 450  *
 451  * \param[in] dispatch  Pointer to function to use as callback
 452  *
 453  * \note Previously, client callbacks were responsible for peer cache
 454  *       management. This is no longer the case, and client callbacks should do
 455  *       only client-specific handling. Callbacks MUST NOT add or remove entries
 456  *       in the peer caches.
 457  */
 458 void
 459 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
     /* [previous][next][first][last][top][bottom][index][help] */
 460 {
 461     crm_status_callback = dispatch;
 462 }
 463 
 464 /*!
 465  * \brief Tell the library whether to automatically reap lost nodes
 466  *
 467  * If TRUE (the default), calling crm_update_peer_proc() will also update the
 468  * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and crm_update_peer_state()
 469  * will reap peers whose state changes to anything other than CRM_NODE_MEMBER.
 470  * Callers should leave this enabled unless they plan to manage the cache
 471  * separately on their own.
 472  *
 473  * \param[in] autoreap  TRUE to enable automatic reaping, FALSE to disable
 474  */
 475 void
 476 crm_set_autoreap(gboolean autoreap)
     /* [previous][next][first][last][top][bottom][index][help] */
 477 {
 478     crm_autoreap = autoreap;
 479 }
 480 
 481 static void crm_dump_peer_hash(int level, const char *caller)
     /* [previous][next][first][last][top][bottom][index][help] */
 482 {
 483     GHashTableIter iter;
 484     const char *id = NULL;
 485     crm_node_t *node = NULL;
 486 
 487     g_hash_table_iter_init(&iter, crm_peer_cache);
 488     while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
 489         do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
 490     }
 491 }
 492 
 493 static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 494 {
 495     if(value == user_data) {
 496         return TRUE;
 497     }
 498     return FALSE;
 499 }
 500 
 501 crm_node_t *
 502 crm_find_peer_full(unsigned int id, const char *uname, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 503 {
 504     crm_node_t *node = NULL;
 505 
 506     CRM_ASSERT(id > 0 || uname != NULL);
 507 
 508     crm_peer_init();
 509 
 510     if (flags & CRM_GET_PEER_REMOTE) {
 511         node = g_hash_table_lookup(crm_remote_peer_cache, uname);
 512     }
 513 
 514     if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
 515         node = crm_find_peer(id, uname);
 516     }
 517     return node;
 518 }
 519 
 520 crm_node_t *
 521 crm_get_peer_full(unsigned int id, const char *uname, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 522 {
 523     crm_node_t *node = NULL;
 524 
 525     CRM_ASSERT(id > 0 || uname != NULL);
 526 
 527     crm_peer_init();
 528 
 529     if (flags & CRM_GET_PEER_REMOTE) {
 530         node = g_hash_table_lookup(crm_remote_peer_cache, uname);
 531     }
 532 
 533     if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
 534         node = crm_get_peer(id, uname);
 535     }
 536     return node;
 537 }
 538 
 539 crm_node_t *
 540 crm_find_peer(unsigned int id, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
 541 {
 542     GHashTableIter iter;
 543     crm_node_t *node = NULL;
 544     crm_node_t *by_id = NULL;
 545     crm_node_t *by_name = NULL;
 546 
 547     CRM_ASSERT(id > 0 || uname != NULL);
 548 
 549     crm_peer_init();
 550 
 551     if (uname != NULL) {
 552         g_hash_table_iter_init(&iter, crm_peer_cache);
 553         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 554             if(node->uname && strcasecmp(node->uname, uname) == 0) {
 555                 crm_trace("Name match: %s = %p", node->uname, node);
 556                 by_name = node;
 557                 break;
 558             }
 559         }
 560     }
 561 
 562     if (id > 0) {
 563         g_hash_table_iter_init(&iter, crm_peer_cache);
 564         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 565             if(node->id == id) {
 566                 crm_trace("ID match: %u = %p", node->id, node);
 567                 by_id = node;
 568                 break;
 569             }
 570         }
 571     }
 572 
 573     node = by_id; /* Good default */
 574     if(by_id == by_name) {
 575         /* Nothing to do if they match (both NULL counts) */
 576         crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
 577 
 578     } else if(by_id == NULL && by_name) {
 579         crm_trace("Only one: %p for %u/%s", by_name, id, uname);
 580 
 581         if(id && by_name->id) {
 582             crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
 583             crm_crit("Node %u and %u share the same name '%s'",
 584                      id, by_name->id, uname);
 585             node = NULL; /* Create a new one */
 586 
 587         } else {
 588             node = by_name;
 589         }
 590 
 591     } else if(by_name == NULL && by_id) {
 592         crm_trace("Only one: %p for %u/%s", by_id, id, uname);
 593 
 594         if(uname && by_id->uname) {
 595             crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
 596             crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
 597                      uname, by_id->uname, id, uname);
 598         }
 599 
 600     } else if(uname && by_id->uname) {
 601         if(safe_str_eq(uname, by_id->uname)) {
 602             crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
 603             g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
 604 
 605         } else {
 606             crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
 607             crm_dump_peer_hash(LOG_INFO, __FUNCTION__);
 608             crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE);
 609         }
 610 
 611     } else if(id && by_name->id) {
 612         crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
 613 
 614     } else {
 615         /* Simple merge */
 616 
 617         /* Only corosync based clusters use nodeid's
 618          *
 619          * The functions that call crm_update_peer_state() only know nodeid
 620          * so 'by_id' is authorative when merging
 621          *
 622          * Same for crm_update_peer_proc()
 623          */
 624         crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__);
 625 
 626         crm_info("Merging %p into %p", by_name, by_id);
 627         g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
 628     }
 629 
 630     return node;
 631 }
 632 
 633 #if SUPPORT_COROSYNC
 634 static guint
 635 crm_remove_conflicting_peer(crm_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 636 {
 637     int matches = 0;
 638     GHashTableIter iter;
 639     crm_node_t *existing_node = NULL;
 640 
 641     if (node->id == 0 || node->uname == NULL) {
 642         return 0;
 643     }
 644 
 645 #  if !SUPPORT_PLUGIN
 646     if (corosync_cmap_has_config("nodelist") != 0) {
 647         return 0;
 648     }
 649 #  endif
 650 
 651     g_hash_table_iter_init(&iter, crm_peer_cache);
 652     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
 653         if (existing_node->id > 0
 654             && existing_node->id != node->id
 655             && existing_node->uname != NULL
 656             && strcasecmp(existing_node->uname, node->uname) == 0) {
 657 
 658             if (crm_is_peer_active(existing_node)) {
 659                 continue;
 660             }
 661 
 662             crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
 663                      existing_node->id, existing_node->uname, node->id);
 664 
 665             g_hash_table_iter_remove(&iter);
 666             matches++;
 667         }
 668     }
 669 
 670     return matches;
 671 }
 672 #endif
 673 
 674 /* coverity[-alloc] Memory is referenced in one or both hashtables */
 675 crm_node_t *
 676 crm_get_peer(unsigned int id, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
 677 {
 678     crm_node_t *node = NULL;
 679     char *uname_lookup = NULL;
 680 
 681     CRM_ASSERT(id > 0 || uname != NULL);
 682 
 683     crm_peer_init();
 684 
 685     node = crm_find_peer(id, uname);
 686 
 687     /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
 688      * we need to do a lookup of the node name using the id in the cluster membership. */
 689     if ((node == NULL || node->uname == NULL) && (uname == NULL)) { 
 690         uname_lookup = get_node_name(id);
 691     }
 692 
 693     if (uname_lookup) {
 694         uname = uname_lookup;
 695         crm_trace("Inferred a name of '%s' for node %u", uname, id);
 696 
 697         /* try to turn up the node one more time now that we know the uname. */
 698         if (node == NULL) {
 699             node = crm_find_peer(id, uname);
 700         }
 701     }
 702 
 703 
 704     if (node == NULL) {
 705         char *uniqueid = crm_generate_uuid();
 706 
 707         node = calloc(1, sizeof(crm_node_t));
 708         CRM_ASSERT(node);
 709 
 710         crm_info("Created entry %s/%p for node %s/%u (%d total)",
 711                  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
 712         g_hash_table_replace(crm_peer_cache, uniqueid, node);
 713     }
 714 
 715     if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
 716         crm_info("Node %u is now known as %s", id, uname);
 717     }
 718 
 719     if(id > 0 && node->id == 0) {
 720         node->id = id;
 721     }
 722 
 723     if (uname && (node->uname == NULL)) {
 724         crm_update_peer_uname(node, uname);
 725     }
 726 
 727     if(node->uuid == NULL) {
 728         const char *uuid = crm_peer_uuid(node);
 729 
 730         if (uuid) {
 731             crm_info("Node %u has uuid %s", id, uuid);
 732 
 733         } else {
 734             crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
 735         }
 736     }
 737 
 738     free(uname_lookup);
 739 
 740     return node;
 741 }
 742 
 743 /*!
 744  * \internal
 745  * \brief Update all of a node's information (process list, state, etc.)
 746  *
 747  * \param[in] source      Caller's function name (for log messages)
 748  *
 749  * \return NULL if node was reaped from peer caches, pointer to node otherwise
 750  *
 751  * \note This function should not be called within a peer cache iteration,
 752  *       otherwise reaping could invalidate the iterator.
 753  */
 754 crm_node_t *
 755 crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes,
     /* [previous][next][first][last][top][bottom][index][help] */
 756                 uint32_t children, const char *uuid, const char *uname, const char *addr,
 757                 const char *state)
 758 {
 759 #if SUPPORT_PLUGIN
 760     gboolean addr_changed = FALSE;
 761     gboolean votes_changed = FALSE;
 762 #endif
 763     crm_node_t *node = NULL;
 764 
 765     id = get_corosync_id(id, uuid);
 766     node = crm_get_peer(id, uname);
 767 
 768     CRM_ASSERT(node != NULL);
 769 
 770     if (node->uuid == NULL) {
 771         if (is_openais_cluster()) {
 772             /* Yes, overrule whatever was passed in */
 773             crm_peer_uuid(node);
 774 
 775         } else if (uuid != NULL) {
 776             node->uuid = strdup(uuid);
 777         }
 778     }
 779 
 780     if (children > 0) {
 781         if (crm_update_peer_proc(source, node, children, state) == NULL) {
 782             return NULL;
 783         }
 784     }
 785 
 786     if (state != NULL) {
 787         if (crm_update_peer_state(source, node, state, seen) == NULL) {
 788             return NULL;
 789         }
 790     }
 791 #if SUPPORT_HEARTBEAT
 792     if (born != 0) {
 793         node->born = born;
 794     }
 795 #endif
 796 
 797 #if SUPPORT_PLUGIN
 798     /* These were only used by the plugin */
 799     if (born != 0) {
 800         node->born = born;
 801     }
 802 
 803     if (votes > 0 && node->votes != votes) {
 804         votes_changed = TRUE;
 805         node->votes = votes;
 806     }
 807 
 808     if (addr != NULL) {
 809         if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) {
 810             addr_changed = TRUE;
 811             free(node->addr);
 812             node->addr = strdup(addr);
 813         }
 814     }
 815     if (addr_changed || votes_changed) {
 816         crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T
 817                  " proc=%.32x", source, node->uname, node->id, node->state,
 818                  node->addr, addr_changed ? " (new)" : "", node->votes,
 819                  votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes);
 820     }
 821 #endif
 822 
 823     return node;
 824 }
 825 
 826 /*!
 827  * \internal
 828  * \brief Update a node's uname
 829  *
 830  * \param[in] node        Node object to update
 831  * \param[in] uname       New name to set
 832  *
 833  * \note This function should not be called within a peer cache iteration,
 834  *       because in some cases it can remove conflicting cache entries,
 835  *       which would invalidate the iterator.
 836  */
 837 void
 838 crm_update_peer_uname(crm_node_t *node, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
 839 {
 840     CRM_CHECK(uname != NULL,
 841               crm_err("Bug: can't update node name without name"); return);
 842     CRM_CHECK(node != NULL,
 843               crm_err("Bug: can't update node name to %s without node", uname);
 844               return);
 845 
 846     if (safe_str_eq(uname, node->uname)) {
 847         crm_debug("Node uname '%s' did not change", uname);
 848         return;
 849     }
 850 
 851     for (const char *c = uname; *c; ++c) {
 852         if ((*c >= 'A') && (*c <= 'Z')) {
 853             crm_warn("Node names with capitals are discouraged, consider changing '%s'",
 854                      uname);
 855             break;
 856         }
 857     }
 858 
 859     free(node->uname);
 860     node->uname = strdup(uname);
 861     CRM_ASSERT(node->uname != NULL);
 862 
 863     if (crm_status_callback) {
 864         crm_status_callback(crm_status_uname, node, NULL);
 865     }
 866 
 867 #if SUPPORT_COROSYNC
 868     if (is_openais_cluster() && !is_set(node->flags, crm_remote_node)) {
 869         crm_remove_conflicting_peer(node);
 870     }
 871 #endif
 872 }
 873 
 874 /*!
 875  * \internal
 876  * \brief Update a node's process information (and potentially state)
 877  *
 878  * \param[in] source      Caller's function name (for log messages)
 879  * \param[in] node        Node object to update
 880  * \param[in] flag        Bitmask of new process information
 881  * \param[in] status      node status (online, offline, etc.)
 882  *
 883  * \return NULL if any node was reaped from peer caches, value of node otherwise
 884  *
 885  * \note If this function returns NULL, the supplied node object was likely
 886  *       freed and should not be used again. This function should not be
 887  *       called within a cache iteration if reaping is possible, otherwise
 888  *       reaping could invalidate the iterator.
 889  */
 890 crm_node_t *
 891 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
     /* [previous][next][first][last][top][bottom][index][help] */
 892 {
 893     uint32_t last = 0;
 894     gboolean changed = FALSE;
 895 
 896     CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
 897                                     source, peer2text(flag), status); return NULL);
 898 
 899     /* Pacemaker doesn't spawn processes on remote nodes */
 900     if (is_set(node->flags, crm_remote_node)) {
 901         return node;
 902     }
 903 
 904     last = node->processes;
 905     if (status == NULL) {
 906         node->processes = flag;
 907         if (node->processes != last) {
 908             changed = TRUE;
 909         }
 910 
 911     } else if (safe_str_eq(status, ONLINESTATUS)) {
 912         if ((node->processes & flag) != flag) {
 913             set_bit(node->processes, flag);
 914             changed = TRUE;
 915         }
 916 #if SUPPORT_PLUGIN
 917     } else if (safe_str_eq(status, CRM_NODE_MEMBER)) {
 918         if (flag > 0 && node->processes != flag) {
 919             node->processes = flag;
 920             changed = TRUE;
 921         }
 922 #endif
 923 
 924     } else if (node->processes & flag) {
 925         clear_bit(node->processes, flag);
 926         changed = TRUE;
 927     }
 928 
 929     if (changed) {
 930         if (status == NULL && flag <= crm_proc_none) {
 931             crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
 932                      node->id);
 933         } else {
 934             crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
 935                      peer2text(flag), status);
 936         }
 937 
 938         /* Call the client callback first, then update the peer state,
 939          * in case the node will be reaped
 940          */
 941         if (crm_status_callback) {
 942             crm_status_callback(crm_status_processes, node, &last);
 943         }
 944 
 945         /* The client callback shouldn't touch the peer caches,
 946          * but as a safety net, bail if the peer cache was destroyed.
 947          */
 948         if (crm_peer_cache == NULL) {
 949             return NULL;
 950         }
 951 
 952         if (crm_autoreap) {
 953             node = crm_update_peer_state(__FUNCTION__, node,
 954                                          is_set(node->processes, crm_get_cluster_proc())?
 955                                          CRM_NODE_MEMBER : CRM_NODE_LOST, 0);
 956         }
 957     } else {
 958         crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
 959                   peer2text(flag), status);
 960     }
 961     return node;
 962 }
 963 
 964 void
 965 crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected)
     /* [previous][next][first][last][top][bottom][index][help] */
 966 {
 967     char *last = NULL;
 968     gboolean changed = FALSE;
 969 
 970     CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
 971               return);
 972 
 973     /* Remote nodes don't participate in joins */
 974     if (is_set(node->flags, crm_remote_node)) {
 975         return;
 976     }
 977 
 978     last = node->expected;
 979     if (expected != NULL && safe_str_neq(node->expected, expected)) {
 980         node->expected = strdup(expected);
 981         changed = TRUE;
 982     }
 983 
 984     if (changed) {
 985         crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
 986                  expected, last);
 987         free(last);
 988     } else {
 989         crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
 990                   node->id, expected);
 991     }
 992 }
 993 
 994 /*!
 995  * \internal
 996  * \brief Update a node's state and membership information
 997  *
 998  * \param[in] source      Caller's function name (for log messages)
 999  * \param[in] node        Node object to update
1000  * \param[in] state       Node's new state
1001  * \param[in] membership  Node's new membership ID
1002  * \param[in] iter        If not NULL, pointer to node's peer cache iterator
1003  *
1004  * \return NULL if any node was reaped, value of node otherwise
1005  *
1006  * \note If this function returns NULL, the supplied node object was likely
1007  *       freed and should not be used again. This function may be called from
1008  *       within a peer cache iteration if the iterator is supplied.
1009  */
1010 static crm_node_t *
1011 crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, int membership, GHashTableIter *iter)
     /* [previous][next][first][last][top][bottom][index][help] */
1012 {
1013     gboolean is_member;
1014 
1015     CRM_CHECK(node != NULL,
1016               crm_err("Could not set state for unknown host to %s"
1017                       CRM_XS " source=%s", state, source);
1018               return NULL);
1019 
1020     is_member = safe_str_eq(state, CRM_NODE_MEMBER);
1021     if (membership && is_member) {
1022         node->last_seen = membership;
1023     }
1024 
1025     if (state && safe_str_neq(node->state, state)) {
1026         char *last = node->state;
1027         enum crm_status_type status_type = is_set(node->flags, crm_remote_node)?
1028                                            crm_status_rstate : crm_status_nstate;
1029 
1030         node->state = strdup(state);
1031         crm_notice("Node %s state is now %s " CRM_XS
1032                    " nodeid=%u previous=%s source=%s", node->uname, state,
1033                    node->id, (last? last : "unknown"), source);
1034         if (crm_status_callback) {
1035             crm_status_callback(status_type, node, last);
1036         }
1037         free(last);
1038 
1039         if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) {
1040             /* We only autoreap from the peer cache, not the remote peer cache,
1041              * because the latter should be managed only by
1042              * crm_remote_peer_cache_refresh().
1043              */
1044             if(iter) {
1045                 crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
1046                 g_hash_table_iter_remove(iter);
1047 
1048             } else {
1049                 reap_crm_member(node->id, node->uname);
1050             }
1051             node = NULL;
1052         }
1053 
1054     } else {
1055         crm_trace("Node %s state is unchanged (%s) " CRM_XS
1056                   " nodeid=%u source=%s", node->uname, state, node->id, source);
1057     }
1058     return node;
1059 }
1060 
1061 /*!
1062  * \brief Update a node's state and membership information
1063  *
1064  * \param[in] source      Caller's function name (for log messages)
1065  * \param[in] node        Node object to update
1066  * \param[in] state       Node's new state
1067  * \param[in] membership  Node's new membership ID
1068  *
1069  * \return NULL if any node was reaped, value of node otherwise
1070  *
1071  * \note If this function returns NULL, the supplied node object was likely
1072  *       freed and should not be used again. This function should not be
1073  *       called within a cache iteration if reaping is possible,
1074  *       otherwise reaping could invalidate the iterator.
1075  */
1076 crm_node_t *
1077 crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership)
     /* [previous][next][first][last][top][bottom][index][help] */
1078 {
1079     return crm_update_peer_state_iter(source, node, state, membership, NULL);
1080 }
1081 
1082 /*!
1083  * \internal
1084  * \brief Reap all nodes from cache whose membership information does not match
1085  *
1086  * \param[in] membership  Membership ID of nodes to keep
1087  */
1088 void
1089 crm_reap_unseen_nodes(uint64_t membership)
     /* [previous][next][first][last][top][bottom][index][help] */
1090 {
1091     GHashTableIter iter;
1092     crm_node_t *node = NULL;
1093 
1094     crm_trace("Reaping unseen nodes...");
1095     g_hash_table_iter_init(&iter, crm_peer_cache);
1096     while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1097         if (node->last_seen != membership) {
1098             if (node->state) {
1099                 /*
1100                  * Calling crm_update_peer_state_iter() allows us to
1101                  * remove the node from crm_peer_cache without
1102                  * invalidating our iterator
1103                  */
1104                 crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter);
1105 
1106             } else {
1107                 crm_info("State of node %s[%u] is still unknown",
1108                          node->uname, node->id);
1109             }
1110         }
1111     }
1112 }
1113 
1114 int
1115 crm_terminate_member(int nodeid, const char *uname, void *unused)
     /* [previous][next][first][last][top][bottom][index][help] */
1116 {
1117     /* Always use the synchronous, non-mainloop version */
1118     return stonith_api_kick(nodeid, uname, 120, TRUE);
1119 }
1120 
1121 int
1122 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
     /* [previous][next][first][last][top][bottom][index][help] */
1123 {
1124     return stonith_api_kick(nodeid, uname, 120, TRUE);
1125 }

/* [previous][next][first][last][top][bottom][index][help] */