pacemaker 3.0.1-16e74fc4da
Scalable High-Availability cluster resource manager
Loading...
Searching...
No Matches
membership.c
Go to the documentation of this file.
1/*
2 * Copyright 2004-2025 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10#include <crm_internal.h>
11
12#include <inttypes.h> // PRIu32
13#include <stdbool.h> // bool
14#include <stdio.h>
15#include <string.h>
16#include <sys/param.h>
17#include <sys/types.h>
18#include <unistd.h>
19
20#include <glib.h>
21
22#include <crm/common/ipc.h>
25#include <crm/common/xml.h>
26#include <crm/stonith-ng.h>
27#include "crmcluster_private.h"
28
29/* The peer cache remembers cluster nodes that have been seen. This is managed
30 * mostly automatically by libcrmcluster, based on cluster membership events.
31 *
32 * Because cluster nodes can have conflicting names or UUIDs, the hash table key
33 * is a uniquely generated ID.
34 *
35 * @TODO Move caches to pcmk_cluster_t
36 */
37GHashTable *pcmk__peer_cache = NULL;
38
39/* The remote peer cache tracks pacemaker_remote nodes. While the
40 * value has the same type as the peer cache's, it is tracked separately for
41 * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
42 * so the name (which is also the UUID) is used as the hash table key; there
43 * is no equivalent of membership events, so management is not automatic; and
44 * most users of the peer cache need to exclude pacemaker_remote nodes.
45 *
46 * @TODO That said, using a single cache would be more logical and less
47 * error-prone, so it would be a good idea to merge them one day.
48 *
49 * libcrmcluster provides two avenues for populating the cache:
50 * pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node()
51 * directly manage it, while refresh_remote_nodes() populates it via the CIB.
52 *
53 * @TODO Move caches to pcmk_cluster_t
54 */
55GHashTable *pcmk__remote_peer_cache = NULL;
56
57/*
58 * The CIB cluster node cache tracks cluster nodes that have been seen in
59 * the CIB. It is useful mainly when a caller needs to know about a node that
60 * may no longer be in the membership, but doesn't want to add the node to the
61 * main peer cache tables.
62 */
63static GHashTable *cluster_node_cib_cache = NULL;
64
65static bool autoreap = true;
66static bool has_quorum = false;
67
68// Flag setting and clearing for pcmk__node_status_t:flags
69
70#define set_peer_flags(peer, flags_to_set) do { \
71 (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
72 "Peer", (peer)->name, \
73 (peer)->flags, (flags_to_set), \
74 #flags_to_set); \
75 } while (0)
76
77#define clear_peer_flags(peer, flags_to_clear) do { \
78 (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
79 LOG_TRACE, \
80 "Peer", (peer)->name, \
81 (peer)->flags, (flags_to_clear), \
82 #flags_to_clear); \
83 } while (0)
84
85static void update_peer_uname(pcmk__node_status_t *node, const char *uname);
86static pcmk__node_status_t *find_cib_cluster_node(const char *id,
87 const char *uname);
88
95bool
97{
98 return has_quorum;
99}
100
107void
109{
110 has_quorum = quorate;
111}
112
119unsigned int
121{
122 if (pcmk__remote_peer_cache == NULL) {
123 return 0U;
124 }
125 return g_hash_table_size(pcmk__remote_peer_cache);
126}
127
145{
146 pcmk__node_status_t *node = NULL;
147 char *node_name_copy = NULL;
148
149 if (node_name == NULL) {
150 errno = EINVAL;
151 return NULL;
152 }
153
154 /* It's theoretically possible that the node was added to the cluster peer
155 * cache before it was known to be a Pacemaker Remote node. Remove that
156 * entry unless it has an XML ID, which means the name actually is
157 * associated with a cluster node. (@TODO return an error in that case?)
158 */
159 node = pcmk__search_node_caches(0, node_name, NULL,
161 if ((node != NULL)
162 && ((node->xml_id == NULL)
163 /* This assumes only Pacemaker Remote nodes have their XML ID the
164 * same as their node name
165 */
166 || pcmk__str_eq(node->name, node->xml_id, pcmk__str_none))) {
167
168 /* node_name could be a pointer into the cache entry being removed, so
169 * reassign it to a copy before the original gets freed
170 */
171 node_name_copy = strdup(node_name);
172 if (node_name_copy == NULL) {
173 errno = ENOMEM;
174 return NULL;
175 }
176 node_name = node_name_copy;
178 }
179
180 /* Return existing cache entry if one exists */
181 node = g_hash_table_lookup(pcmk__remote_peer_cache, node_name);
182 if (node) {
183 free(node_name_copy);
184 return node;
185 }
186
187 /* Allocate a new entry */
188 node = calloc(1, sizeof(pcmk__node_status_t));
189 if (node == NULL) {
190 free(node_name_copy);
191 return NULL;
192 }
193
194 /* Populate the essential information */
196 node->xml_id = strdup(node_name);
197 if (node->xml_id == NULL) {
198 free(node);
199 errno = ENOMEM;
200 free(node_name_copy);
201 return NULL;
202 }
203
204 /* Add the new entry to the cache */
205 g_hash_table_replace(pcmk__remote_peer_cache, node->xml_id, node);
206 crm_trace("added %s to remote cache", node_name);
207
208 /* Update the entry's uname, ensuring peer status callbacks are called */
209 update_peer_uname(node, node_name);
210 free(node_name_copy);
211 return node;
212}
213
223void
225{
226 /* Do a lookup first, because node_name could be a pointer within the entry
227 * being removed -- we can't log it *after* removing it.
228 */
229 if (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL) {
230 crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
231 g_hash_table_remove(pcmk__remote_peer_cache, node_name);
232 }
233}
234
244static const char *
245remote_state_from_cib(const xmlNode *node_state)
246{
247 bool in_ccm = false;
248
250 &in_ccm) == pcmk_rc_ok) && in_ccm) {
251 return PCMK_VALUE_MEMBER;
252 }
253 return PCMK__VALUE_LOST;
254}
255
256/* user data for looping through remote node xpath searches */
257struct refresh_data {
258 const char *field; /* XML attribute to check for node name */
259 gboolean has_state; /* whether to update node state based on XML */
260};
261
269static void
270remote_cache_refresh_helper(xmlNode *result, void *user_data)
271{
272 const struct refresh_data *data = user_data;
273 const char *remote = crm_element_value(result, data->field);
274 const char *state = NULL;
276
277 CRM_CHECK(remote != NULL, return);
278
279 /* Determine node's state, if the result has it */
280 if (data->has_state) {
281 state = remote_state_from_cib(result);
282 }
283
284 /* Check whether cache already has entry for node */
285 node = g_hash_table_lookup(pcmk__remote_peer_cache, remote);
286
287 if (node == NULL) {
288 /* Node is not in cache, so add a new entry for it */
290 pcmk__assert(node != NULL);
291 if (state) {
292 pcmk__update_peer_state(__func__, node, state, 0);
293 }
294
295 } else if (pcmk_is_set(node->flags, pcmk__node_status_dirty)) {
296 /* Node is in cache and hasn't been updated already, so mark it clean */
298 if (state) {
299 pcmk__update_peer_state(__func__, node, state, 0);
300 }
301 }
302}
303
304static void
305mark_dirty(gpointer key, gpointer value, gpointer user_data)
306{
308}
309
310static gboolean
311is_dirty(gpointer key, gpointer value, gpointer user_data)
312{
313 const pcmk__node_status_t *node = value;
314
316}
317
324static void
325refresh_remote_nodes(xmlNode *cib)
326{
327 struct refresh_data data;
328
330
331 /* First, we mark all existing cache entries as dirty,
332 * so that later we can remove any that weren't in the CIB.
333 * We don't empty the cache, because we need to detect changes in state.
334 */
335 g_hash_table_foreach(pcmk__remote_peer_cache, mark_dirty, NULL);
336
337 /* Look for guest nodes and remote nodes in the status section */
338 data.field = PCMK_XA_ID;
339 data.has_state = TRUE;
341 remote_cache_refresh_helper, &data);
342
343 /* Look for guest nodes and remote nodes in the configuration section,
344 * because they may have just been added and not have a status entry yet.
345 * In that case, the cached node state will be left NULL, so that the
346 * peer status callback isn't called until we're sure the node started
347 * successfully.
348 */
349 data.field = PCMK_XA_VALUE;
350 data.has_state = FALSE;
352 remote_cache_refresh_helper, &data);
353 data.field = PCMK_XA_ID;
354 data.has_state = FALSE;
356 remote_cache_refresh_helper, &data);
357
358 /* Remove all old cache entries that weren't seen in the CIB */
359 g_hash_table_foreach_remove(pcmk__remote_peer_cache, is_dirty, NULL);
360}
361
373bool
375{
376 const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
377
378 if ((node == NULL) || pcmk_is_set(node->flags, pcmk__node_status_remote)) {
379 return false;
380 }
381
382 switch (cluster_layer) {
384#if SUPPORT_COROSYNC
386#else
387 break;
388#endif // SUPPORT_COROSYNC
389 default:
390 break;
391 }
392
393 crm_err("Unhandled cluster layer: %s",
394 pcmk_cluster_layer_text(cluster_layer));
395 return false;
396}
397
424static gboolean
425should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
426{
427 pcmk__node_status_t *node = value;
428 pcmk__node_status_t *search = user_data;
429
430 if (search == NULL) {
431 return FALSE;
432 }
433 if ((search->cluster_layer_id != 0)
434 && (node->cluster_layer_id != search->cluster_layer_id)) {
435 return FALSE;
436 }
437 if ((search->cluster_layer_id == 0)
438 && !pcmk__str_eq(node->name, search->name, pcmk__str_casei)) {
439 // @TODO Consider name even if ID is set?
440 return FALSE;
441 }
442 if (pcmk__cluster_is_node_active(value)) {
443 return FALSE;
444 }
445
446 crm_info("Removing node with name %s and cluster layer ID %" PRIu32
447 " from membership cache",
448 pcmk__s(node->name, "(unknown)"), node->cluster_layer_id);
449 return TRUE;
450}
451
470void
471pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
472{
473 pcmk__node_status_t search = { 0, };
474 char *criterion = NULL; // For logging
475 guint matches = 0;
476
477 if (pcmk__peer_cache == NULL) {
478 crm_trace("Membership cache not initialized, ignoring removal request");
479 return;
480 }
481
482 search.cluster_layer_id = id;
483 search.name = pcmk__str_copy(node_name); // May log after original freed
484
485 if (id > 0) {
486 criterion = crm_strdup_printf("cluster layer ID %" PRIu32, id);
487
488 } else if (node_name != NULL) {
489 criterion = crm_strdup_printf("name %s", node_name);
490 }
491
492 matches = g_hash_table_foreach_remove(pcmk__peer_cache,
493 should_forget_cluster_node, &search);
494 if (matches > 0) {
495 if (criterion != NULL) {
496 crm_notice("Removed %u inactive node%s with %s from the membership "
497 "cache",
498 matches, pcmk__plural_s(matches), criterion);
499 } else {
500 crm_notice("Removed all (%u) inactive cluster nodes from the "
501 "membership cache",
502 matches);
503 }
504
505 } else {
506 crm_info("No inactive cluster nodes%s%s to remove from the membership "
507 "cache",
508 ((criterion != NULL)? " with " : ""), pcmk__s(criterion, ""));
509 }
510
511 free(search.name);
512 free(criterion);
513}
514
515static void
516count_peer(gpointer key, gpointer value, gpointer user_data)
517{
518 unsigned int *count = user_data;
519 pcmk__node_status_t *node = value;
520
522 *count = *count + 1;
523 }
524}
525
535unsigned int
537{
538 unsigned int count = 0;
539
540 if (pcmk__peer_cache != NULL) {
541 g_hash_table_foreach(pcmk__peer_cache, count_peer, &count);
542 }
543 return count;
544}
545
546static void
547destroy_crm_node(gpointer data)
548{
550
551 crm_trace("Destroying entry for node %" PRIu32 ": %s",
552 node->cluster_layer_id, node->name);
553
554 free(node->name);
555 free(node->state);
556 free(node->xml_id);
557 free(node->user_data);
558 free(node->expected);
559 free(node->conn_host);
560 free(node);
561}
562
567void
569{
570 if (pcmk__peer_cache == NULL) {
571 pcmk__peer_cache = pcmk__strikey_table(free, destroy_crm_node);
572 }
573
574 if (pcmk__remote_peer_cache == NULL) {
575 pcmk__remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
576 }
577
578 if (cluster_node_cib_cache == NULL) {
579 cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
580 }
581}
582
587void
589{
590 if (pcmk__peer_cache != NULL) {
591 crm_trace("Destroying peer cache with %d members",
592 g_hash_table_size(pcmk__peer_cache));
593 g_hash_table_destroy(pcmk__peer_cache);
594 pcmk__peer_cache = NULL;
595 }
596
597 if (pcmk__remote_peer_cache != NULL) {
598 crm_trace("Destroying remote peer cache with %d members",
600 g_hash_table_destroy(pcmk__remote_peer_cache);
602 }
603
604 if (cluster_node_cib_cache != NULL) {
605 crm_trace("Destroying configured cluster node cache with %d members",
606 g_hash_table_size(cluster_node_cib_cache));
607 g_hash_table_destroy(cluster_node_cib_cache);
608 cluster_node_cib_cache = NULL;
609 }
610}
611
612static void (*peer_status_callback)(enum pcmk__node_update,
614 const void *) = NULL;
615
625void
628 const void *))
629{
630 // @TODO Improve documentation of peer_status_callback
631 peer_status_callback = dispatch;
632}
633
648void
650{
651 autoreap = enable;
652}
653
654static void
655dump_peer_hash(int level, const char *caller)
656{
657 GHashTableIter iter;
658 const char *id = NULL;
659 pcmk__node_status_t *node = NULL;
660
661 g_hash_table_iter_init(&iter, pcmk__peer_cache);
662 while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
663 do_crm_log(level, "%s: Node %" PRIu32 "/%s = %p - %s",
664 caller, node->cluster_layer_id, node->name, node, id);
665 }
666}
667
668static gboolean
669hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
670{
671 return value == user_data;
672}
673
685static pcmk__node_status_t *
686search_cluster_member_cache(unsigned int id, const char *uname,
687 const char *uuid)
688{
689 GHashTableIter iter;
690 pcmk__node_status_t *node = NULL;
691 pcmk__node_status_t *by_id = NULL;
692 pcmk__node_status_t *by_name = NULL;
693
694 pcmk__assert((id > 0) || (uname != NULL));
695
697
698 if (uname != NULL) {
699 g_hash_table_iter_init(&iter, pcmk__peer_cache);
700 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
701 if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) {
702 crm_trace("Name match: %s", node->name);
703 by_name = node;
704 break;
705 }
706 }
707 }
708
709 if (id > 0) {
710 g_hash_table_iter_init(&iter, pcmk__peer_cache);
711 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
712 if (node->cluster_layer_id == id) {
713 crm_trace("ID match: %" PRIu32, node->cluster_layer_id);
714 by_id = node;
715 break;
716 }
717 }
718
719 } else if (uuid != NULL) {
720 g_hash_table_iter_init(&iter, pcmk__peer_cache);
721 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
722 const char *this_xml_id = pcmk__cluster_get_xml_id(node);
723
724 if (pcmk__str_eq(uuid, this_xml_id, pcmk__str_none)) {
725 crm_trace("Found cluster node cache entry by XML ID %s",
726 this_xml_id);
727 by_id = node;
728 break;
729 }
730 }
731 }
732
733 node = by_id; /* Good default */
734 if(by_id == by_name) {
735 /* Nothing to do if they match (both NULL counts) */
736 crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
737
738 } else if(by_id == NULL && by_name) {
739 crm_trace("Only one: %p for %u/%s", by_name, id, uname);
740
741 if (id && by_name->cluster_layer_id) {
742 dump_peer_hash(LOG_WARNING, __func__);
743 crm_crit("Nodes %u and %" PRIu32 " share the same name '%s'",
744 id, by_name->cluster_layer_id, uname);
745 node = NULL; /* Create a new one */
746
747 } else {
748 node = by_name;
749 }
750
751 } else if(by_name == NULL && by_id) {
752 crm_trace("Only one: %p for %u/%s", by_id, id, uname);
753
754 if ((uname != NULL) && (by_id->name != NULL)) {
755 dump_peer_hash(LOG_WARNING, __func__);
756 crm_crit("Nodes '%s' and '%s' share the same cluster nodeid %u: "
757 "assuming '%s' is correct",
758 uname, by_id->name, id, uname);
759 }
760
761 } else if ((uname != NULL) && (by_id->name != NULL)) {
762 if (pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) {
763 crm_notice("Node '%s' has changed its cluster layer ID "
764 "from %" PRIu32 " to %" PRIu32,
765 by_id->name, by_name->cluster_layer_id,
766 by_id->cluster_layer_id);
767 g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data,
768 by_name);
769
770 } else {
771 crm_warn("Nodes '%s' and '%s' share the same cluster nodeid: %u %s",
772 by_id->name, by_name->name, id, uname);
773 dump_peer_hash(LOG_INFO, __func__);
774 crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
775 TRUE);
776 }
777
778 } else if ((id > 0) && (by_name->cluster_layer_id > 0)) {
779 crm_warn("Nodes %" PRIu32 " and %" PRIu32 " share the same name: '%s'",
780 by_id->cluster_layer_id, by_name->cluster_layer_id, uname);
781
782 } else {
783 /* Simple merge */
784
785 /* Only corosync-based clusters use node IDs. The functions that call
786 * pcmk__update_peer_state() and crm_update_peer_proc() only know
787 * nodeid, so 'by_id' is authoritative when merging.
788 */
789 dump_peer_hash(LOG_DEBUG, __func__);
790
791 crm_info("Merging %p into %p", by_name, by_id);
792 g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data,
793 by_name);
794 }
795
796 return node;
797}
798
811pcmk__search_node_caches(unsigned int id, const char *uname,
812 const char *xml_id, uint32_t flags)
813{
814 pcmk__node_status_t *node = NULL;
815
816 pcmk__assert((id > 0) || (uname != NULL) || (xml_id != NULL));
817
819
821 if (uname != NULL) {
822 node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
823 } else if (xml_id != NULL) {
824 node = g_hash_table_lookup(pcmk__remote_peer_cache, xml_id);
825 }
826 }
827
828 if ((node == NULL)
830
831 node = search_cluster_member_cache(id, uname, xml_id);
832 }
833
834 if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
835 if (xml_id != NULL) {
836 node = find_cib_cluster_node(xml_id, uname);
837 } else {
838 // Assumes XML ID is node ID as string (as with Corosync)
839 char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);
840
841 node = find_cib_cluster_node(id_str, uname);
842 free(id_str);
843 }
844 }
845
846 return node;
847}
848
863void
864pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
865{
866 char *node_name_copy = NULL;
867
868 if ((node_name == NULL) && (node_id == 0U)) {
869 return;
870 }
871
872 // Purge from Pacemaker Remote node cache
873 if ((node_name != NULL)
874 && (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL)) {
875 /* node_name could be a pointer into the cache entry being purged,
876 * so reassign it to a copy before the original gets freed
877 */
878 node_name_copy = pcmk__str_copy(node_name);
879 node_name = node_name_copy;
880
881 crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
882 g_hash_table_remove(pcmk__remote_peer_cache, node_name);
883 }
884
885 pcmk__cluster_forget_cluster_node(node_id, node_name);
886 free(node_name_copy);
887}
888
889#if SUPPORT_COROSYNC
890static guint
891remove_conflicting_peer(pcmk__node_status_t *node)
892{
893 int matches = 0;
894 GHashTableIter iter;
895 pcmk__node_status_t *existing_node = NULL;
896
897 if ((node->cluster_layer_id == 0) || (node->name == NULL)) {
898 return 0;
899 }
900
902 return 0;
903 }
904
905 g_hash_table_iter_init(&iter, pcmk__peer_cache);
906 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
907 if ((existing_node->cluster_layer_id > 0)
908 && (existing_node->cluster_layer_id != node->cluster_layer_id)
909 && pcmk__str_eq(existing_node->name, node->name, pcmk__str_casei)) {
910
911 if (pcmk__cluster_is_node_active(existing_node)) {
912 continue;
913 }
914
915 crm_warn("Removing cached offline node %" PRIu32 "/%s which has "
916 "conflicting name with %" PRIu32,
917 existing_node->cluster_layer_id, existing_node->name,
918 node->cluster_layer_id);
919
920 g_hash_table_iter_remove(&iter);
921 matches++;
922 }
923 }
924
925 return matches;
926}
927#endif
928
945/* coverity[-alloc] Memory is referenced in one or both hashtables */
947pcmk__get_node(unsigned int id, const char *uname, const char *xml_id,
948 uint32_t flags)
949{
950 pcmk__node_status_t *node = NULL;
951 char *uname_lookup = NULL;
952
953 pcmk__assert((id > 0) || (uname != NULL));
954
956
957 // Check the Pacemaker Remote node cache first
959 node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
960 if (node != NULL) {
961 return node;
962 }
963 }
964
966 return NULL;
967 }
968
969 node = search_cluster_member_cache(id, uname, xml_id);
970
971 /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
972 * we need to do a lookup of the node name using the id in the cluster membership. */
973 if ((uname == NULL) && ((node == NULL) || (node->name == NULL))) {
974 uname_lookup = pcmk__cluster_node_name(id);
975 }
976
977 if (uname_lookup) {
978 uname = uname_lookup;
979 crm_trace("Inferred a name of '%s' for node %u", uname, id);
980
981 /* try to turn up the node one more time now that we know the uname. */
982 if (node == NULL) {
983 node = search_cluster_member_cache(id, uname, xml_id);
984 }
985 }
986
987 if (node == NULL) {
988 char *uniqueid = crm_generate_uuid();
989
990 node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t));
991
992 crm_info("Created entry %s/%p for node %s/%u (%d total)",
993 uniqueid, node, uname, id,
994 1 + g_hash_table_size(pcmk__peer_cache));
995 g_hash_table_replace(pcmk__peer_cache, uniqueid, node);
996 }
997
998 if ((id > 0) && (uname != NULL)
999 && ((node->cluster_layer_id == 0) || (node->name == NULL))) {
1000 crm_info("Node %u is now known as %s", id, uname);
1001 }
1002
1003 if ((id > 0) && (node->cluster_layer_id == 0)) {
1004 node->cluster_layer_id = id;
1005 }
1006
1007 if ((uname != NULL) && (node->name == NULL)) {
1008 update_peer_uname(node, uname);
1009 }
1010
1011 if ((xml_id == NULL) && (node->xml_id == NULL)) {
1012 xml_id = pcmk__cluster_get_xml_id(node);
1013 if (xml_id == NULL) {
1014 crm_debug("Cannot obtain an XML ID for node %s[%u] at this time",
1015 node->name, id);
1016 } else {
1017 crm_info("Node %s[%u] has XML ID %s", node->name, id, xml_id);
1018 }
1019 }
1020
1021 free(uname_lookup);
1022
1023 return node;
1024}
1025
1037static void
1038update_peer_uname(pcmk__node_status_t *node, const char *uname)
1039{
1040 CRM_CHECK(uname != NULL,
1041 crm_err("Bug: can't update node name without name"); return);
1042 CRM_CHECK(node != NULL,
1043 crm_err("Bug: can't update node name to %s without node", uname);
1044 return);
1045
1046 if (pcmk__str_eq(uname, node->name, pcmk__str_casei)) {
1047 crm_debug("Node name '%s' did not change", uname);
1048 return;
1049 }
1050
1051 for (const char *c = uname; *c; ++c) {
1052 if ((*c >= 'A') && (*c <= 'Z')) {
1053 crm_warn("Node names with capitals are discouraged, consider changing '%s'",
1054 uname);
1055 break;
1056 }
1057 }
1058
1059 pcmk__str_update(&node->name, uname);
1060
1061 if (peer_status_callback != NULL) {
1062 peer_status_callback(pcmk__node_update_name, node, NULL);
1063 }
1064
1065#if SUPPORT_COROSYNC
1068
1069 remove_conflicting_peer(node);
1070 }
1071#endif
1072}
1073
1082static inline const char *
1083proc2text(enum crm_proc_flag proc)
1084{
1085 const char *text = "unknown";
1086
1087 switch (proc) {
1088 case crm_proc_none:
1089 text = "none";
1090 break;
1091 case crm_proc_cpg:
1092 text = "corosync-cpg";
1093 break;
1094 }
1095 return text;
1096}
1097
1116 uint32_t flag, const char *status)
1117{
1118 uint32_t last = 0;
1119 gboolean changed = FALSE;
1120
1121 CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
1122 source, proc2text(flag), status);
1123 return NULL);
1124
1125 /* Pacemaker doesn't spawn processes on remote nodes */
1127 return node;
1128 }
1129
1130 last = node->processes;
1131 if (status == NULL) {
1132 node->processes = flag;
1133 if (node->processes != last) {
1134 changed = TRUE;
1135 }
1136
1137 } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1138 if ((node->processes & flag) != flag) {
1139 node->processes = pcmk__set_flags_as(__func__, __LINE__,
1140 LOG_TRACE, "Peer process",
1141 node->name, node->processes,
1142 flag, "processes");
1143 changed = TRUE;
1144 }
1145
1146 } else if (node->processes & flag) {
1147 node->processes = pcmk__clear_flags_as(__func__, __LINE__,
1148 LOG_TRACE, "Peer process",
1149 node->name, node->processes,
1150 flag, "processes");
1151 changed = TRUE;
1152 }
1153
1154 if (changed) {
1155 if (status == NULL && flag <= crm_proc_none) {
1156 crm_info("%s: Node %s[%" PRIu32 "] - all processes are now offline",
1157 source, node->name, node->cluster_layer_id);
1158 } else {
1159 crm_info("%s: Node %s[%" PRIu32 "] - %s is now %s",
1160 source, node->name, node->cluster_layer_id,
1161 proc2text(flag), status);
1162 }
1163
1164 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1165 node->when_online = time(NULL);
1166
1167 } else {
1168 node->when_online = 0;
1169 }
1170
1171 /* Call the client callback first, then update the peer state,
1172 * in case the node will be reaped
1173 */
1174 if (peer_status_callback != NULL) {
1175 peer_status_callback(pcmk__node_update_processes, node, &last);
1176 }
1177
1178 /* The client callback shouldn't touch the peer caches,
1179 * but as a safety net, bail if the peer cache was destroyed.
1180 */
1181 if (pcmk__peer_cache == NULL) {
1182 return NULL;
1183 }
1184
1185 if (autoreap) {
1186 const char *peer_state = NULL;
1187
1188 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1189 peer_state = PCMK_VALUE_MEMBER;
1190 } else {
1191 peer_state = PCMK__VALUE_LOST;
1192 }
1193 node = pcmk__update_peer_state(__func__, node, peer_state, 0);
1194 }
1195 } else {
1196 crm_trace("%s: Node %s[%" PRIu32 "] - %s is unchanged (%s)",
1197 source, node->name, node->cluster_layer_id, proc2text(flag),
1198 status);
1199 }
1200 return node;
1201}
1202
1211void
1213 const char *expected)
1214{
1215 char *last = NULL;
1216 gboolean changed = FALSE;
1217
1218 CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
1219 return);
1220
1221 /* Remote nodes don't participate in joins */
1223 return;
1224 }
1225
1226 last = node->expected;
1227 if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
1228 node->expected = strdup(expected);
1229 changed = TRUE;
1230 }
1231
1232 if (changed) {
1233 crm_info("%s: Node %s[%" PRIu32 "] - expected state is now %s (was %s)",
1234 source, node->name, node->cluster_layer_id, expected, last);
1235 free(last);
1236 } else {
1237 crm_trace("%s: Node %s[%" PRIu32 "] - expected state is unchanged (%s)",
1238 source, node->name, node->cluster_layer_id, expected);
1239 }
1240}
1241
1258static pcmk__node_status_t *
1259update_peer_state_iter(const char *source, pcmk__node_status_t *node,
1260 const char *state, uint64_t membership,
1261 GHashTableIter *iter)
1262{
1263 gboolean is_member;
1264
1265 CRM_CHECK(node != NULL,
1266 crm_err("Could not set state for unknown host to %s "
1267 QB_XS " source=%s", state, source);
1268 return NULL);
1269
1270 is_member = pcmk__str_eq(state, PCMK_VALUE_MEMBER, pcmk__str_none);
1271 if (is_member) {
1272 node->when_lost = 0;
1273 if (membership) {
1274 node->membership_id = membership;
1275 }
1276 }
1277
1278 if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
1279 char *last = node->state;
1280
1281 if (is_member) {
1282 node->when_member = time(NULL);
1283
1284 } else {
1285 node->when_member = 0;
1286 }
1287
1288 node->state = strdup(state);
1289 crm_notice("Node %s state is now %s " QB_XS
1290 " nodeid=%" PRIu32 " previous=%s source=%s",
1291 node->name, state, node->cluster_layer_id,
1292 pcmk__s(last, "unknown"), source);
1293 if (peer_status_callback != NULL) {
1294 peer_status_callback(pcmk__node_update_state, node, last);
1295 }
1296 free(last);
1297
1298 if (autoreap && !is_member
1300 /* We only autoreap from the peer cache, not the remote peer cache,
1301 * because the latter should be managed only by
1302 * refresh_remote_nodes().
1303 */
1304 if(iter) {
1305 crm_notice("Purged 1 peer with cluster layer ID %" PRIu32
1306 "and/or name=%s from the membership cache",
1307 node->cluster_layer_id, node->name);
1308 g_hash_table_iter_remove(iter);
1309
1310 } else {
1312 node->name);
1313 }
1314 node = NULL;
1315 }
1316
1317 } else {
1318 crm_trace("Node %s state is unchanged (%s) " QB_XS
1319 " nodeid=%" PRIu32 " source=%s",
1320 node->name, state, node->cluster_layer_id, source);
1321 }
1322 return node;
1323}
1324
1342 const char *state, uint64_t membership)
1343{
1344 return update_peer_state_iter(source, node, state, membership, NULL);
1345}
1346
1353void
1354pcmk__reap_unseen_nodes(uint64_t membership)
1355{
1356 GHashTableIter iter;
1357 pcmk__node_status_t *node = NULL;
1358
1359 crm_trace("Reaping unseen nodes...");
1360 g_hash_table_iter_init(&iter, pcmk__peer_cache);
1361 while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1362 if (node->membership_id != membership) {
1363 if (node->state) {
1364 /* Calling update_peer_state_iter() allows us to remove the node
1365 * from pcmk__peer_cache without invalidating our iterator
1366 */
1367 update_peer_state_iter(__func__, node, PCMK__VALUE_LOST,
1368 membership, &iter);
1369
1370 } else {
1371 crm_info("State of node %s[%" PRIu32 "] is still unknown",
1372 node->name, node->cluster_layer_id);
1373 }
1374 }
1375 }
1376}
1377
1378static pcmk__node_status_t *
1379find_cib_cluster_node(const char *id, const char *uname)
1380{
1381 GHashTableIter iter;
1382 pcmk__node_status_t *node = NULL;
1383 pcmk__node_status_t *by_id = NULL;
1384 pcmk__node_status_t *by_name = NULL;
1385
1386 if (uname) {
1387 g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1388 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1389 if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) {
1390 crm_trace("Name match: %s = %p", node->name, node);
1391 by_name = node;
1392 break;
1393 }
1394 }
1395 }
1396
1397 if (id) {
1398 g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1399 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1400 if (pcmk__str_eq(id, pcmk__cluster_get_xml_id(node),
1401 pcmk__str_none)) {
1402 crm_trace("ID match: %s= %p", id, node);
1403 by_id = node;
1404 break;
1405 }
1406 }
1407 }
1408
1409 node = by_id; /* Good default */
1410 if (by_id == by_name) {
1411 /* Nothing to do if they match (both NULL counts) */
1412 crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1413
1414 } else if (by_id == NULL && by_name) {
1415 crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1416
1417 if (id) {
1418 node = NULL;
1419
1420 } else {
1421 node = by_name;
1422 }
1423
1424 } else if (by_name == NULL && by_id) {
1425 crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1426
1427 if (uname) {
1428 node = NULL;
1429 }
1430
1431 } else if ((uname != NULL) && (by_id->name != NULL)
1432 && pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) {
1433 /* Multiple nodes have the same uname in the CIB.
1434 * Return by_id. */
1435
1436 } else if ((id != NULL) && (by_name->xml_id != NULL)
1437 && pcmk__str_eq(id, by_name->xml_id, pcmk__str_none)) {
1438 /* Multiple nodes have the same id in the CIB.
1439 * Return by_name. */
1440 node = by_name;
1441
1442 } else {
1443 node = NULL;
1444 }
1445
1446 if (node == NULL) {
1447 crm_debug("Couldn't find node%s%s%s%s",
1448 id? " " : "",
1449 id? id : "",
1450 uname? " with name " : "",
1451 uname? uname : "");
1452 }
1453
1454 return node;
1455}
1456
1457static void
1458cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1459{
1460 const char *id = crm_element_value(xml_node, PCMK_XA_ID);
1461 const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME);
1462 pcmk__node_status_t * node = NULL;
1463
1464 CRM_CHECK(id != NULL && uname !=NULL, return);
1465 node = find_cib_cluster_node(id, uname);
1466
1467 if (node == NULL) {
1468 char *uniqueid = crm_generate_uuid();
1469
1470 node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t));
1471
1472 node->name = pcmk__str_copy(uname);
1473 node->xml_id = pcmk__str_copy(id);
1474
1475 g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);
1476
1477 } else if (pcmk_is_set(node->flags, pcmk__node_status_dirty)) {
1478 pcmk__str_update(&node->name, uname);
1479
1480 /* Node is in cache and hasn't been updated already, so mark it clean */
1482 }
1483
1484}
1485
1486static void
1487refresh_cluster_node_cib_cache(xmlNode *cib)
1488{
1490
1491 g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);
1492
1494 cluster_node_cib_cache_refresh_helper, NULL);
1495
1496 // Remove all old cache entries that weren't seen in the CIB
1497 g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
1498}
1499
1500void
1502{
1503 refresh_remote_nodes(cib);
1504 refresh_cluster_node_cib_cache(cib);
1505}
1506
1507// Deprecated functions kept only for backward API compatibility
1508// LCOV_EXCL_START
1509
1510#include <crm/cluster/compat.h>
1511
1512void
1517
1518// LCOV_EXCL_STOP
1519// End deprecated API
pcmk__node_update
Definition internal.h:80
@ pcmk__node_update_processes
Node process group membership updated.
Definition internal.h:83
@ pcmk__node_update_name
Node name updated.
Definition internal.h:81
@ pcmk__node_update_state
Node connection state updated.
Definition internal.h:82
const char * pcmk__cluster_get_xml_id(pcmk__node_status_t *node)
Definition cluster.c:44
@ pcmk__node_search_cluster_member
Search for cluster nodes from membership cache.
Definition internal.h:63
@ pcmk__node_search_cluster_cib
Search for cluster nodes from CIB (as of last cache refresh)
Definition internal.h:73
@ pcmk__node_search_remote
Search for remote nodes.
Definition internal.h:66
crm_proc_flag
Definition internal.h:31
@ crm_proc_cpg
Definition internal.h:35
@ crm_proc_none
Definition internal.h:32
@ pcmk__node_status_remote
Definition internal.h:51
@ pcmk__node_status_dirty
Node's cache entry is dirty.
Definition internal.h:54
char * pcmk__cluster_node_name(uint32_t nodeid)
Definition cluster.c:237
const char * pcmk_cluster_layer_text(enum pcmk_cluster_layer layer)
Get a log-friendly string equivalent of a cluster layer.
Definition cluster.c:357
enum pcmk_cluster_layer pcmk_get_cluster_layer(void)
Get and validate the local cluster layer.
Definition cluster.c:384
pcmk_cluster_layer
Types of cluster layer.
Definition cluster.h:69
@ pcmk_cluster_layer_corosync
Corosync Cluster Engine.
Definition cluster.h:72
#define pcmk__assert_alloc(nmemb, size)
Definition internal.h:246
uint64_t flags
Definition remote.c:3
char * crm_generate_uuid(void)
Definition utils.c:339
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition util.h:80
Deprecated Pacemaker cluster API.
bool pcmk__corosync_has_nodelist(void)
Definition corosync.c:737
bool pcmk__corosync_is_peer_active(const pcmk__node_status_t *node)
Definition corosync.c:535
char uname[MAX_NAME]
Definition cpg.c:5
char data[0]
Definition cpg.c:10
uint32_t id
Definition cpg.c:0
IPC interface to Pacemaker daemons.
#define crm_info(fmt, args...)
Definition logging.h:365
#define do_crm_log(level, fmt, args...)
Log a message.
Definition logging.h:149
#define crm_warn(fmt, args...)
Definition logging.h:360
#define crm_crit(fmt, args...)
Definition logging.h:354
#define crm_notice(fmt, args...)
Definition logging.h:363
#define CRM_CHECK(expr, failure_action)
Definition logging.h:213
#define crm_debug(fmt, args...)
Definition logging.h:368
#define crm_err(fmt, args...)
Definition logging.h:357
#define crm_trace(fmt, args...)
Definition logging.h:370
#define LOG_TRACE
Definition logging.h:38
unsigned int pcmk__cluster_num_remote_nodes(void)
Definition membership.c:120
void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
Definition membership.c:471
#define clear_peer_flags(peer, flags_to_clear)
Definition membership.c:77
GHashTable * pcmk__peer_cache
Definition membership.c:37
void pcmk__cluster_set_quorum(bool quorate)
Definition membership.c:108
pcmk__node_status_t * pcmk__update_peer_state(const char *source, pcmk__node_status_t *node, const char *state, uint64_t membership)
Update a node's state and membership information.
void pcmk__reap_unseen_nodes(uint64_t membership)
void pcmk__refresh_node_caches_from_cib(xmlNode *cib)
void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
Definition membership.c:864
void pcmk__cluster_set_status_callback(void(*dispatch)(enum pcmk__node_update, pcmk__node_status_t *, const void *))
Definition membership.c:626
pcmk__node_status_t * pcmk__get_node(unsigned int id, const char *uname, const char *xml_id, uint32_t flags)
Definition membership.c:947
#define set_peer_flags(peer, flags_to_set)
Definition membership.c:70
void pcmk__cluster_destroy_node_caches(void)
Definition membership.c:588
pcmk__node_status_t * pcmk__cluster_lookup_remote_node(const char *node_name)
Definition membership.c:144
bool pcmk__cluster_is_node_active(const pcmk__node_status_t *node)
Definition membership.c:374
unsigned int pcmk__cluster_num_active_nodes(void)
Definition membership.c:536
bool pcmk__cluster_has_quorum(void)
Definition membership.c:96
void pcmk__cluster_forget_remote_node(const char *node_name)
Definition membership.c:224
void pcmk__cluster_init_node_caches(void)
Definition membership.c:568
void pcmk__update_peer_expected(const char *source, pcmk__node_status_t *node, const char *expected)
void pcmk__cluster_set_autoreap(bool enable)
Definition membership.c:649
void crm_peer_init(void)
pcmk__node_status_t * crm_update_peer_proc(const char *source, pcmk__node_status_t *node, uint32_t flag, const char *status)
GHashTable * pcmk__remote_peer_cache
Definition membership.c:55
pcmk__node_status_t * pcmk__search_node_caches(unsigned int id, const char *uname, const char *xml_id, uint32_t flags)
Definition membership.c:811
#define PCMK_VALUE_MEMBER
Definition options.h:171
#define PCMK_VALUE_ONLINE
Definition options.h:187
#define PCMK__VALUE_LOST
pcmk__action_result_t result
Definition pcmk_fence.c:37
@ pcmk_rc_ok
Definition results.h:159
void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork)
Definition results.c:215
#define pcmk__assert(expr)
Fencing aka. STONITH.
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk__plural_s(i)
void pcmk__str_update(char **str, const char *value)
Definition strings.c:1280
@ pcmk__str_none
@ pcmk__str_casei
#define pcmk__str_copy(str)
GHashTable * pcmk__strikey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition strings.c:741
Node status data (may be a cluster node or a Pacemaker Remote node)
Definition internal.h:111
uint32_t cluster_layer_id
Cluster-layer numeric node ID.
Definition internal.h:165
uint32_t processes
Definition internal.h:144
uint64_t membership_id
Definition internal.h:142
void * user_data
Arbitrary data (must be freeable by free())
Definition internal.h:152
char * name
Node name as known to cluster layer, or Pacemaker Remote node name.
Definition internal.h:113
time_t when_lost
When CPG membership was last lost.
Definition internal.h:166
uint32_t flags
Group of enum pcmk__node_status_flags
Definition internal.h:136
Wrappers for and extensions to libxml2.
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
int pcmk__xe_get_bool_attr(const xmlNode *node, const char *name, bool *value)
#define PCMK_XA_ID
Definition xml_names.h:301
#define PCMK_XA_VALUE
Definition xml_names.h:442
#define PCMK_XA_UNAME
Definition xml_names.h:431
#define PCMK__XA_IN_CCM
#define PCMK__XP_REMOTE_NODE_CONFIG
XPath expression matching CIB Pacemaker Remote connection resource.
#define PCMK__XP_REMOTE_NODE_STATUS
XPath expression matching CIB node state elements for Pacemaker Remote nodes.
#define PCMK__XP_MEMBER_NODE_CONFIG
XPath expression matching CIB node elements for cluster nodes.
#define PCMK__XP_GUEST_NODE_CONFIG
XPath expression matching CIB primitive meta-attribute defining a guest node.
void pcmk__xpath_foreach_result(xmlDoc *doc, const char *path, void(*fn)(xmlNode *, void *), void *user_data)
Definition xpath.c:170