pacemaker  2.1.7-0f7f88312f
Scalable High-Availability cluster resource manager
membership.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2023 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU Lesser General Public License
7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #ifndef _GNU_SOURCE
13 # define _GNU_SOURCE
14 #endif
15 
16 #include <sys/param.h>
17 #include <sys/types.h>
18 #include <stdio.h>
19 #include <unistd.h>
20 #include <string.h>
21 #include <glib.h>
22 #include <crm/common/ipc.h>
24 #include <crm/cluster/internal.h>
25 #include <crm/msg_xml.h>
26 #include <crm/stonith-ng.h>
27 #include "crmcluster_private.h"
28 
29 /* The peer cache remembers cluster nodes that have been seen.
30  * This is managed mostly automatically by libcluster, based on
31  * cluster membership events.
32  *
33  * Because cluster nodes can have conflicting names or UUIDs,
34  * the hash table key is a uniquely generated ID.
35  */
36 GHashTable *crm_peer_cache = NULL;
37 
38 /*
39  * The remote peer cache tracks pacemaker_remote nodes. While the
40  * value has the same type as the peer cache's, it is tracked separately for
41  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
42  * so the name (which is also the UUID) is used as the hash table key; there
43  * is no equivalent of membership events, so management is not automatic; and
44  * most users of the peer cache need to exclude pacemaker_remote nodes.
45  *
46  * That said, using a single cache would be more logical and less error-prone,
47  * so it would be a good idea to merge them one day.
48  *
49  * libcluster provides two avenues for populating the cache:
50  * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it,
51  * while crm_remote_peer_cache_refresh() populates it via the CIB.
52  */
53 GHashTable *crm_remote_peer_cache = NULL;
54 
55 /*
56  * The known node cache tracks cluster and remote nodes that have been seen in
57  * the CIB. It is useful mainly when a caller needs to know about a node that
58  * may no longer be in the membership, but doesn't want to add the node to the
59  * main peer cache tables.
60  */
61 static GHashTable *known_node_cache = NULL;
62 
63 unsigned long long crm_peer_seq = 0;
64 gboolean crm_have_quorum = FALSE;
65 static gboolean crm_autoreap = TRUE;
66 
67 // Flag setting and clearing for crm_node_t:flags
68 
69 #define set_peer_flags(peer, flags_to_set) do { \
70  (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
71  "Peer", (peer)->uname, \
72  (peer)->flags, (flags_to_set), \
73  #flags_to_set); \
74  } while (0)
75 
76 #define clear_peer_flags(peer, flags_to_clear) do { \
77  (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
78  LOG_TRACE, \
79  "Peer", (peer)->uname, \
80  (peer)->flags, (flags_to_clear), \
81  #flags_to_clear); \
82  } while (0)
83 
84 static void update_peer_uname(crm_node_t *node, const char *uname);
85 
86 int
88 {
89  if (crm_remote_peer_cache == NULL) {
90  return 0;
91  }
92  return g_hash_table_size(crm_remote_peer_cache);
93 }
94 
106 crm_node_t *
107 crm_remote_peer_get(const char *node_name)
108 {
109  crm_node_t *node;
110 
111  if (node_name == NULL) {
112  errno = -EINVAL;
113  return NULL;
114  }
115 
116  /* Return existing cache entry if one exists */
117  node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
118  if (node) {
119  return node;
120  }
121 
122  /* Allocate a new entry */
123  node = calloc(1, sizeof(crm_node_t));
124  if (node == NULL) {
125  return NULL;
126  }
127 
128  /* Populate the essential information */
130  node->uuid = strdup(node_name);
131  if (node->uuid == NULL) {
132  free(node);
133  errno = -ENOMEM;
134  return NULL;
135  }
136 
137  /* Add the new entry to the cache */
138  g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
139  crm_trace("added %s to remote cache", node_name);
140 
141  /* Update the entry's uname, ensuring peer status callbacks are called */
142  update_peer_uname(node, node_name);
143  return node;
144 }
145 
146 void
147 crm_remote_peer_cache_remove(const char *node_name)
148 {
149  if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
150  crm_trace("removed %s from remote peer cache", node_name);
151  }
152 }
153 
165 static const char *
166 remote_state_from_cib(const xmlNode *node_state)
167 {
168  bool status = false;
169 
170  if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM,
171  &status) == pcmk_rc_ok) && !status) {
172  return CRM_NODE_LOST;
173  } else {
174  return CRM_NODE_MEMBER;
175  }
176 }
177 
178 /* user data for looping through remote node xpath searches */
179 struct refresh_data {
180  const char *field; /* XML attribute to check for node name */
181  gboolean has_state; /* whether to update node state based on XML */
182 };
183 
191 static void
192 remote_cache_refresh_helper(xmlNode *result, void *user_data)
193 {
194  const struct refresh_data *data = user_data;
195  const char *remote = crm_element_value(result, data->field);
196  const char *state = NULL;
197  crm_node_t *node;
198 
199  CRM_CHECK(remote != NULL, return);
200 
201  /* Determine node's state, if the result has it */
202  if (data->has_state) {
203  state = remote_state_from_cib(result);
204  }
205 
206  /* Check whether cache already has entry for node */
207  node = g_hash_table_lookup(crm_remote_peer_cache, remote);
208 
209  if (node == NULL) {
210  /* Node is not in cache, so add a new entry for it */
211  node = crm_remote_peer_get(remote);
212  CRM_ASSERT(node);
213  if (state) {
214  pcmk__update_peer_state(__func__, node, state, 0);
215  }
216 
217  } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
218  /* Node is in cache and hasn't been updated already, so mark it clean */
220  if (state) {
221  pcmk__update_peer_state(__func__, node, state, 0);
222  }
223  }
224 }
225 
226 static void
227 mark_dirty(gpointer key, gpointer value, gpointer user_data)
228 {
230 }
231 
232 static gboolean
233 is_dirty(gpointer key, gpointer value, gpointer user_data)
234 {
235  return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
236 }
237 
243 void
245 {
246  struct refresh_data data;
247 
248  crm_peer_init();
249 
250  /* First, we mark all existing cache entries as dirty,
251  * so that later we can remove any that weren't in the CIB.
252  * We don't empty the cache, because we need to detect changes in state.
253  */
254  g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
255 
256  /* Look for guest nodes and remote nodes in the status section */
257  data.field = "id";
258  data.has_state = TRUE;
260  remote_cache_refresh_helper, &data);
261 
262  /* Look for guest nodes and remote nodes in the configuration section,
263  * because they may have just been added and not have a status entry yet.
264  * In that case, the cached node state will be left NULL, so that the
265  * peer status callback isn't called until we're sure the node started
266  * successfully.
267  */
268  data.field = "value";
269  data.has_state = FALSE;
271  remote_cache_refresh_helper, &data);
272  data.field = "id";
273  data.has_state = FALSE;
275  remote_cache_refresh_helper, &data);
276 
277  /* Remove all old cache entries that weren't seen in the CIB */
278  g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
279 }
280 
281 gboolean
283 {
284  if(node == NULL) {
285  return FALSE;
286  }
287 
288  if (pcmk_is_set(node->flags, crm_remote_node)) {
289  /* remote nodes are never considered active members. This
290  * guarantees they will never be considered for DC membership.*/
291  return FALSE;
292  }
293 #if SUPPORT_COROSYNC
294  if (is_corosync_cluster()) {
295  return crm_is_corosync_peer_active(node);
296  }
297 #endif
298  crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
299  return FALSE;
300 }
301 
302 static gboolean
303 crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
304 {
305  crm_node_t *node = value;
306  crm_node_t *search = user_data;
307 
308  if (search == NULL) {
309  return FALSE;
310 
311  } else if (search->id && node->id != search->id) {
312  return FALSE;
313 
314  } else if (search->id == 0 && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
315  return FALSE;
316 
317  } else if (crm_is_peer_active(value) == FALSE) {
318  crm_info("Removing node with name %s and id %u from membership cache",
319  (node->uname? node->uname : "unknown"), node->id);
320  return TRUE;
321  }
322  return FALSE;
323 }
324 
333 guint
334 reap_crm_member(uint32_t id, const char *name)
335 {
336  int matches = 0;
337  crm_node_t search = { 0, };
338 
339  if (crm_peer_cache == NULL) {
340  crm_trace("Membership cache not initialized, ignoring purge request");
341  return 0;
342  }
343 
344  search.id = id;
345  pcmk__str_update(&search.uname, name);
346  matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
347  if(matches) {
348  crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache",
349  matches, pcmk__plural_s(matches), search.id,
350  (search.uname? " and/or uname=" : ""),
351  (search.uname? search.uname : ""));
352 
353  } else {
354  crm_info("No peers with id=%u%s%s to purge from the membership cache",
355  search.id, (search.uname? " and/or uname=" : ""),
356  (search.uname? search.uname : ""));
357  }
358 
359  free(search.uname);
360  return matches;
361 }
362 
363 static void
364 count_peer(gpointer key, gpointer value, gpointer user_data)
365 {
366  guint *count = user_data;
367  crm_node_t *node = value;
368 
369  if (crm_is_peer_active(node)) {
370  *count = *count + 1;
371  }
372 }
373 
374 guint
376 {
377  guint count = 0;
378 
379  if (crm_peer_cache) {
380  g_hash_table_foreach(crm_peer_cache, count_peer, &count);
381  }
382  return count;
383 }
384 
385 static void
386 destroy_crm_node(gpointer data)
387 {
388  crm_node_t *node = data;
389 
390  crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
391 
392  free(node->uname);
393  free(node->state);
394  free(node->uuid);
395  free(node->expected);
396  free(node->conn_host);
397  free(node);
398 }
399 
400 void
402 {
403  if (crm_peer_cache == NULL) {
404  crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node);
405  }
406 
407  if (crm_remote_peer_cache == NULL) {
408  crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
409  }
410 
411  if (known_node_cache == NULL) {
412  known_node_cache = pcmk__strikey_table(free, destroy_crm_node);
413  }
414 }
415 
416 void
418 {
419  if (crm_peer_cache != NULL) {
420  crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
421  g_hash_table_destroy(crm_peer_cache);
422  crm_peer_cache = NULL;
423  }
424 
425  if (crm_remote_peer_cache != NULL) {
426  crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
427  g_hash_table_destroy(crm_remote_peer_cache);
428  crm_remote_peer_cache = NULL;
429  }
430 
431  if (known_node_cache != NULL) {
432  crm_trace("Destroying known node cache with %d members",
433  g_hash_table_size(known_node_cache));
434  g_hash_table_destroy(known_node_cache);
435  known_node_cache = NULL;
436  }
437 
438 }
439 
440 static void (*peer_status_callback)(enum crm_status_type, crm_node_t *,
441  const void *) = NULL;
442 
453 void
454 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
455 {
456  peer_status_callback = dispatch;
457 }
458 
470 void
471 crm_set_autoreap(gboolean autoreap)
472 {
473  crm_autoreap = autoreap;
474 }
475 
476 static void
477 dump_peer_hash(int level, const char *caller)
478 {
479  GHashTableIter iter;
480  const char *id = NULL;
481  crm_node_t *node = NULL;
482 
483  g_hash_table_iter_init(&iter, crm_peer_cache);
484  while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
485  do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
486  }
487 }
488 
489 static gboolean
490 hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
491 {
492  return value == user_data;
493 }
494 
505 crm_node_t *
506 pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
507 {
508  crm_node_t *node = NULL;
509 
510  CRM_ASSERT(id > 0 || uname != NULL);
511 
512  crm_peer_init();
513 
514  if ((uname != NULL) && pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) {
515  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
516  }
517 
518  if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) {
519  node = pcmk__search_cluster_node_cache(id, uname, NULL);
520  }
521  return node;
522 }
523 
535 crm_node_t *
536 pcmk__get_peer_full(unsigned int id, const char *uname, const char *uuid,
537  int flags)
538 {
539  crm_node_t *node = NULL;
540 
541  CRM_ASSERT(id > 0 || uname != NULL);
542 
543  crm_peer_init();
544 
546  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
547  }
548 
549  if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) {
550  node = pcmk__get_peer(id, uname, uuid);
551  }
552  return node;
553 }
554 
564 crm_node_t *
565 crm_get_peer_full(unsigned int id, const char *uname, int flags)
566 {
567  return pcmk__get_peer_full(id, uname, NULL, flags);
568 }
569 
581 crm_node_t *
582 pcmk__search_cluster_node_cache(unsigned int id, const char *uname,
583  const char *uuid)
584 {
585  GHashTableIter iter;
586  crm_node_t *node = NULL;
587  crm_node_t *by_id = NULL;
588  crm_node_t *by_name = NULL;
589 
590  CRM_ASSERT(id > 0 || uname != NULL);
591 
592  crm_peer_init();
593 
594  if (uname != NULL) {
595  g_hash_table_iter_init(&iter, crm_peer_cache);
596  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
597  if(node->uname && strcasecmp(node->uname, uname) == 0) {
598  crm_trace("Name match: %s = %p", node->uname, node);
599  by_name = node;
600  break;
601  }
602  }
603  }
604 
605  if (id > 0) {
606  g_hash_table_iter_init(&iter, crm_peer_cache);
607  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
608  if(node->id == id) {
609  crm_trace("ID match: %u = %p", node->id, node);
610  by_id = node;
611  break;
612  }
613  }
614 
615  } else if (uuid != NULL) {
616  g_hash_table_iter_init(&iter, crm_peer_cache);
617  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
618  if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
619  crm_trace("UUID match: %s = %p", node->uuid, node);
620  by_id = node;
621  break;
622  }
623  }
624  }
625 
626  node = by_id; /* Good default */
627  if(by_id == by_name) {
628  /* Nothing to do if they match (both NULL counts) */
629  crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
630 
631  } else if(by_id == NULL && by_name) {
632  crm_trace("Only one: %p for %u/%s", by_name, id, uname);
633 
634  if(id && by_name->id) {
635  dump_peer_hash(LOG_WARNING, __func__);
636  crm_crit("Node %u and %u share the same name '%s'",
637  id, by_name->id, uname);
638  node = NULL; /* Create a new one */
639 
640  } else {
641  node = by_name;
642  }
643 
644  } else if(by_name == NULL && by_id) {
645  crm_trace("Only one: %p for %u/%s", by_id, id, uname);
646 
647  if(uname && by_id->uname) {
648  dump_peer_hash(LOG_WARNING, __func__);
649  crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
650  uname, by_id->uname, id, uname);
651  }
652 
653  } else if(uname && by_id->uname) {
654  if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
655  crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
656  g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
657 
658  } else {
659  crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
660  dump_peer_hash(LOG_INFO, __func__);
661  crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
662  TRUE);
663  }
664 
665  } else if(id && by_name->id) {
666  crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
667 
668  } else {
669  /* Simple merge */
670 
671  /* Only corosync-based clusters use node IDs. The functions that call
672  * pcmk__update_peer_state() and crm_update_peer_proc() only know
673  * nodeid, so 'by_id' is authoritative when merging.
674  */
675  dump_peer_hash(LOG_DEBUG, __func__);
676 
677  crm_info("Merging %p into %p", by_name, by_id);
678  g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
679  }
680 
681  return node;
682 }
683 
684 #if SUPPORT_COROSYNC
685 static guint
686 remove_conflicting_peer(crm_node_t *node)
687 {
688  int matches = 0;
689  GHashTableIter iter;
690  crm_node_t *existing_node = NULL;
691 
692  if (node->id == 0 || node->uname == NULL) {
693  return 0;
694  }
695 
697  return 0;
698  }
699 
700  g_hash_table_iter_init(&iter, crm_peer_cache);
701  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
702  if (existing_node->id > 0
703  && existing_node->id != node->id
704  && existing_node->uname != NULL
705  && strcasecmp(existing_node->uname, node->uname) == 0) {
706 
707  if (crm_is_peer_active(existing_node)) {
708  continue;
709  }
710 
711  crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
712  existing_node->id, existing_node->uname, node->id);
713 
714  g_hash_table_iter_remove(&iter);
715  matches++;
716  }
717  }
718 
719  return matches;
720 }
721 #endif
722 
733 /* coverity[-alloc] Memory is referenced in one or both hashtables */
734 crm_node_t *
735 pcmk__get_peer(unsigned int id, const char *uname, const char *uuid)
736 {
737  crm_node_t *node = NULL;
738  char *uname_lookup = NULL;
739 
740  CRM_ASSERT(id > 0 || uname != NULL);
741 
742  crm_peer_init();
743 
744  node = pcmk__search_cluster_node_cache(id, uname, uuid);
745 
746  /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
747  * we need to do a lookup of the node name using the id in the cluster membership. */
748  if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
749  uname_lookup = get_node_name(id);
750  }
751 
752  if (uname_lookup) {
753  uname = uname_lookup;
754  crm_trace("Inferred a name of '%s' for node %u", uname, id);
755 
756  /* try to turn up the node one more time now that we know the uname. */
757  if (node == NULL) {
758  node = pcmk__search_cluster_node_cache(id, uname, uuid);
759  }
760  }
761 
762 
763  if (node == NULL) {
764  char *uniqueid = crm_generate_uuid();
765 
766  node = calloc(1, sizeof(crm_node_t));
767  CRM_ASSERT(node);
768 
769  crm_info("Created entry %s/%p for node %s/%u (%d total)",
770  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
771  g_hash_table_replace(crm_peer_cache, uniqueid, node);
772  }
773 
774  if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
775  crm_info("Node %u is now known as %s", id, uname);
776  }
777 
778  if(id > 0 && node->id == 0) {
779  node->id = id;
780  }
781 
782  if (uname && (node->uname == NULL)) {
783  update_peer_uname(node, uname);
784  }
785 
786  if(node->uuid == NULL) {
787  if (uuid == NULL) {
788  uuid = crm_peer_uuid(node);
789  }
790 
791  if (uuid) {
792  crm_info("Node %u has uuid %s", id, uuid);
793 
794  } else {
795  crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
796  }
797  }
798 
799  free(uname_lookup);
800 
801  return node;
802 }
803 
812 /* coverity[-alloc] Memory is referenced in one or both hashtables */
813 crm_node_t *
814 crm_get_peer(unsigned int id, const char *uname)
815 {
816  return pcmk__get_peer(id, uname, NULL);
817 }
818 
830 static void
831 update_peer_uname(crm_node_t *node, const char *uname)
832 {
833  CRM_CHECK(uname != NULL,
834  crm_err("Bug: can't update node name without name"); return);
835  CRM_CHECK(node != NULL,
836  crm_err("Bug: can't update node name to %s without node", uname);
837  return);
838 
839  if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
840  crm_debug("Node uname '%s' did not change", uname);
841  return;
842  }
843 
844  for (const char *c = uname; *c; ++c) {
845  if ((*c >= 'A') && (*c <= 'Z')) {
846  crm_warn("Node names with capitals are discouraged, consider changing '%s'",
847  uname);
848  break;
849  }
850  }
851 
852  pcmk__str_update(&node->uname, uname);
853 
854  if (peer_status_callback != NULL) {
855  peer_status_callback(crm_status_uname, node, NULL);
856  }
857 
858 #if SUPPORT_COROSYNC
860  remove_conflicting_peer(node);
861  }
862 #endif
863 }
864 
873 static inline const char *
874 proc2text(enum crm_proc_flag proc)
875 {
876  const char *text = "unknown";
877 
878  switch (proc) {
879  case crm_proc_none:
880  text = "none";
881  break;
882  case crm_proc_based:
883  text = "pacemaker-based";
884  break;
885  case crm_proc_controld:
886  text = "pacemaker-controld";
887  break;
888  case crm_proc_schedulerd:
889  text = "pacemaker-schedulerd";
890  break;
891  case crm_proc_execd:
892  text = "pacemaker-execd";
893  break;
894  case crm_proc_attrd:
895  text = "pacemaker-attrd";
896  break;
897  case crm_proc_fenced:
898  text = "pacemaker-fenced";
899  break;
900  case crm_proc_cpg:
901  text = "corosync-cpg";
902  break;
903  }
904  return text;
905 }
906 
923 crm_node_t *
924 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
925 {
926  uint32_t last = 0;
927  gboolean changed = FALSE;
928 
929  CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
930  source, proc2text(flag), status);
931  return NULL);
932 
933  /* Pacemaker doesn't spawn processes on remote nodes */
934  if (pcmk_is_set(node->flags, crm_remote_node)) {
935  return node;
936  }
937 
938  last = node->processes;
939  if (status == NULL) {
940  node->processes = flag;
941  if (node->processes != last) {
942  changed = TRUE;
943  }
944 
945  } else if (pcmk__str_eq(status, ONLINESTATUS, pcmk__str_casei)) {
946  if ((node->processes & flag) != flag) {
947  node->processes = pcmk__set_flags_as(__func__, __LINE__,
948  LOG_TRACE, "Peer process",
949  node->uname, node->processes,
950  flag, "processes");
951  changed = TRUE;
952  }
953 
954  } else if (node->processes & flag) {
955  node->processes = pcmk__clear_flags_as(__func__, __LINE__,
956  LOG_TRACE, "Peer process",
957  node->uname, node->processes,
958  flag, "processes");
959  changed = TRUE;
960  }
961 
962  if (changed) {
963  if (status == NULL && flag <= crm_proc_none) {
964  crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
965  node->id);
966  } else {
967  crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
968  proc2text(flag), status);
969  }
970 
971  if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
972  node->when_online = time(NULL);
973 
974  } else {
975  node->when_online = 0;
976  }
977 
978  /* Call the client callback first, then update the peer state,
979  * in case the node will be reaped
980  */
981  if (peer_status_callback != NULL) {
982  peer_status_callback(crm_status_processes, node, &last);
983  }
984 
985  /* The client callback shouldn't touch the peer caches,
986  * but as a safety net, bail if the peer cache was destroyed.
987  */
988  if (crm_peer_cache == NULL) {
989  return NULL;
990  }
991 
992  if (crm_autoreap) {
993  const char *peer_state = NULL;
994 
995  if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
996  peer_state = CRM_NODE_MEMBER;
997  } else {
998  peer_state = CRM_NODE_LOST;
999  }
1000  node = pcmk__update_peer_state(__func__, node, peer_state, 0);
1001  }
1002  } else {
1003  crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
1004  proc2text(flag), status);
1005  }
1006  return node;
1007 }
1008 
1017 void
1018 pcmk__update_peer_expected(const char *source, crm_node_t *node,
1019  const char *expected)
1020 {
1021  char *last = NULL;
1022  gboolean changed = FALSE;
1023 
1024  CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
1025  return);
1026 
1027  /* Remote nodes don't participate in joins */
1028  if (pcmk_is_set(node->flags, crm_remote_node)) {
1029  return;
1030  }
1031 
1032  last = node->expected;
1033  if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
1034  node->expected = strdup(expected);
1035  changed = TRUE;
1036  }
1037 
1038  if (changed) {
1039  crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
1040  expected, last);
1041  free(last);
1042  } else {
1043  crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
1044  node->id, expected);
1045  }
1046 }
1047 
1064 static crm_node_t *
1065 update_peer_state_iter(const char *source, crm_node_t *node, const char *state,
1066  uint64_t membership, GHashTableIter *iter)
1067 {
1068  gboolean is_member;
1069 
1070  CRM_CHECK(node != NULL,
1071  crm_err("Could not set state for unknown host to %s"
1072  CRM_XS " source=%s", state, source);
1073  return NULL);
1074 
1075  is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
1076  if (is_member) {
1077  node->when_lost = 0;
1078  if (membership) {
1079  node->last_seen = membership;
1080  }
1081  }
1082 
1083  if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
1084  char *last = node->state;
1085 
1086  if (is_member) {
1087  node->when_member = time(NULL);
1088 
1089  } else {
1090  node->when_member = 0;
1091  }
1092 
1093  node->state = strdup(state);
1094  crm_notice("Node %s state is now %s " CRM_XS
1095  " nodeid=%u previous=%s source=%s", node->uname, state,
1096  node->id, (last? last : "unknown"), source);
1097  if (peer_status_callback != NULL) {
1098  peer_status_callback(crm_status_nstate, node, last);
1099  }
1100  free(last);
1101 
1102  if (crm_autoreap && !is_member
1103  && !pcmk_is_set(node->flags, crm_remote_node)) {
1104  /* We only autoreap from the peer cache, not the remote peer cache,
1105  * because the latter should be managed only by
1106  * crm_remote_peer_cache_refresh().
1107  */
1108  if(iter) {
1109  crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
1110  g_hash_table_iter_remove(iter);
1111 
1112  } else {
1113  reap_crm_member(node->id, node->uname);
1114  }
1115  node = NULL;
1116  }
1117 
1118  } else {
1119  crm_trace("Node %s state is unchanged (%s) " CRM_XS
1120  " nodeid=%u source=%s", node->uname, state, node->id, source);
1121  }
1122  return node;
1123 }
1124 
1140 crm_node_t *
1141 pcmk__update_peer_state(const char *source, crm_node_t *node,
1142  const char *state, uint64_t membership)
1143 {
1144  return update_peer_state_iter(source, node, state, membership, NULL);
1145 }
1146 
1153 void
1154 pcmk__reap_unseen_nodes(uint64_t membership)
1155 {
1156  GHashTableIter iter;
1157  crm_node_t *node = NULL;
1158 
1159  crm_trace("Reaping unseen nodes...");
1160  g_hash_table_iter_init(&iter, crm_peer_cache);
1161  while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1162  if (node->last_seen != membership) {
1163  if (node->state) {
1164  /*
1165  * Calling update_peer_state_iter() allows us to
1166  * remove the node from crm_peer_cache without
1167  * invalidating our iterator
1168  */
1169  update_peer_state_iter(__func__, node, CRM_NODE_LOST,
1170  membership, &iter);
1171 
1172  } else {
1173  crm_info("State of node %s[%u] is still unknown",
1174  node->uname, node->id);
1175  }
1176  }
1177  }
1178 }
1179 
1180 static crm_node_t *
1181 find_known_node(const char *id, const char *uname)
1182 {
1183  GHashTableIter iter;
1184  crm_node_t *node = NULL;
1185  crm_node_t *by_id = NULL;
1186  crm_node_t *by_name = NULL;
1187 
1188  if (uname) {
1189  g_hash_table_iter_init(&iter, known_node_cache);
1190  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1191  if (node->uname && strcasecmp(node->uname, uname) == 0) {
1192  crm_trace("Name match: %s = %p", node->uname, node);
1193  by_name = node;
1194  break;
1195  }
1196  }
1197  }
1198 
1199  if (id) {
1200  g_hash_table_iter_init(&iter, known_node_cache);
1201  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1202  if(strcasecmp(node->uuid, id) == 0) {
1203  crm_trace("ID match: %s= %p", id, node);
1204  by_id = node;
1205  break;
1206  }
1207  }
1208  }
1209 
1210  node = by_id; /* Good default */
1211  if (by_id == by_name) {
1212  /* Nothing to do if they match (both NULL counts) */
1213  crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1214 
1215  } else if (by_id == NULL && by_name) {
1216  crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1217 
1218  if (id) {
1219  node = NULL;
1220 
1221  } else {
1222  node = by_name;
1223  }
1224 
1225  } else if (by_name == NULL && by_id) {
1226  crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1227 
1228  if (uname) {
1229  node = NULL;
1230  }
1231 
1232  } else if (uname && by_id->uname
1233  && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
1234  /* Multiple nodes have the same uname in the CIB.
1235  * Return by_id. */
1236 
1237  } else if (id && by_name->uuid
1238  && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
1239  /* Multiple nodes have the same id in the CIB.
1240  * Return by_name. */
1241  node = by_name;
1242 
1243  } else {
1244  node = NULL;
1245  }
1246 
1247  if (node == NULL) {
1248  crm_debug("Couldn't find node%s%s%s%s",
1249  id? " " : "",
1250  id? id : "",
1251  uname? " with name " : "",
1252  uname? uname : "");
1253  }
1254 
1255  return node;
1256 }
1257 
1258 static void
1259 known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1260 {
1261  const char *id = crm_element_value(xml_node, XML_ATTR_ID);
1262  const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME);
1263  crm_node_t * node = NULL;
1264 
1265  CRM_CHECK(id != NULL && uname !=NULL, return);
1266  node = find_known_node(id, uname);
1267 
1268  if (node == NULL) {
1269  char *uniqueid = crm_generate_uuid();
1270 
1271  node = calloc(1, sizeof(crm_node_t));
1272  CRM_ASSERT(node != NULL);
1273 
1274  node->uname = strdup(uname);
1275  CRM_ASSERT(node->uname != NULL);
1276 
1277  node->uuid = strdup(id);
1278  CRM_ASSERT(node->uuid != NULL);
1279 
1280  g_hash_table_replace(known_node_cache, uniqueid, node);
1281 
1282  } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
1283  pcmk__str_update(&node->uname, uname);
1284 
1285  /* Node is in cache and hasn't been updated already, so mark it clean */
1287  }
1288 
1289 }
1290 
1291 static void
1292 refresh_known_node_cache(xmlNode *cib)
1293 {
1294  crm_peer_init();
1295 
1296  g_hash_table_foreach(known_node_cache, mark_dirty, NULL);
1297 
1299  known_node_cache_refresh_helper, NULL);
1300 
1301  /* Remove all old cache entries that weren't seen in the CIB */
1302  g_hash_table_foreach_remove(known_node_cache, is_dirty, NULL);
1303 }
1304 
1305 void
1307 {
1309  refresh_known_node_cache(cib);
1310 }
1311 
1322 crm_node_t *
1323 pcmk__search_known_node_cache(unsigned int id, const char *uname,
1324  uint32_t flags)
1325 {
1326  crm_node_t *node = NULL;
1327  char *id_str = NULL;
1328 
1329  CRM_ASSERT(id > 0 || uname != NULL);
1330 
1331  node = pcmk__search_node_caches(id, uname, flags);
1332 
1333  if (node || !(flags & CRM_GET_PEER_CLUSTER)) {
1334  return node;
1335  }
1336 
1337  if (id > 0) {
1338  id_str = crm_strdup_printf("%u", id);
1339  }
1340 
1341  node = find_known_node(id_str, uname);
1342 
1343  free(id_str);
1344  return node;
1345 }
1346 
1347 
1348 // Deprecated functions kept only for backward API compatibility
1349 // LCOV_EXCL_START
1350 
1351 #include <crm/cluster/compat.h>
1352 
1353 int
1354 crm_terminate_member(int nodeid, const char *uname, void *unused)
1355 {
1356  return stonith_api_kick(nodeid, uname, 120, TRUE);
1357 }
1358 
1359 int
1360 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1361 {
1362  return stonith_api_kick(nodeid, uname, 120, TRUE);
1363 }
1364 
1365 // LCOV_EXCL_STOP
1366 // End deprecated API
#define LOG_TRACE
Definition: logging.h:38
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:238
#define crm_notice(fmt, args...)
Definition: logging.h:383
#define CRM_NODE_LOST
Definition: cluster.h:32
GHashTable * crm_peer_cache
Definition: membership.c:36
#define crm_crit(fmt, args...)
Definition: logging.h:380
char data[0]
Definition: cpg.c:55
char * crm_generate_uuid(void)
Definition: utils.c:509
uint64_t flags
Definition: cluster.h:62
void crm_peer_destroy(void)
Definition: membership.c:417
const char * name
Definition: cib.c:26
uint32_t id
Definition: cluster.h:72
char * uuid
Definition: cluster.h:60
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
Definition: st_client.c:1971
void pcmk__update_peer_expected(const char *source, crm_node_t *node, const char *expected)
Definition: membership.c:1018
gboolean crm_have_quorum
Definition: membership.c:64
crm_node_t * pcmk__get_peer(unsigned int id, const char *uname, const char *uuid)
Get a cluster node cache entry.
Definition: membership.c:735
crm_node_t * pcmk__get_peer_full(unsigned int id, const char *uname, const char *uuid, int flags)
Get a node cache entry (cluster or Pacemaker Remote)
Definition: membership.c:536
crm_node_t * pcmk__update_peer_state(const char *source, crm_node_t *node, const char *state, uint64_t membership)
Update a node&#39;s state and membership information.
Definition: membership.c:1141
GHashTable * crm_remote_peer_cache
Definition: membership.c:53
unsigned long long crm_peer_seq
Definition: membership.c:63
char * get_node_name(uint32_t nodeid)
Get the node name corresponding to a cluster node ID.
Definition: cluster.c:204
void crm_set_autoreap(gboolean autoreap)
Tell the library whether to automatically reap lost nodes.
Definition: membership.c:471
void crm_peer_init(void)
Definition: membership.c:401
void crm_remote_peer_cache_remove(const char *node_name)
Definition: membership.c:147
gboolean crm_is_corosync_peer_active(const crm_node_t *node)
Check whether a Corosync cluster peer is active.
Definition: corosync.c:531
int crm_remote_peer_cache_size(void)
Definition: membership.c:87
#define crm_warn(fmt, args...)
Definition: logging.h:382
#define PCMK__XP_GUEST_NODE_CONFIG
Definition: xml_internal.h:176
uint32_t processes
Definition: cluster.h:64
crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags)
Get a node cache entry (cluster or Pacemaker Remote)
Definition: membership.c:565
#define clear_peer_flags(peer, flags_to_clear)
Definition: membership.c:76
guint reap_crm_member(uint32_t id, const char *name)
Remove all peer cache entries matching a node ID and/or uname.
Definition: membership.c:334
gboolean crm_is_peer_active(const crm_node_t *node)
Definition: membership.c:282
#define crm_debug(fmt, args...)
Definition: logging.h:386
void pcmk__reap_unseen_nodes(uint64_t membership)
Definition: membership.c:1154
#define XML_ATTR_ID
Definition: msg_xml.h:156
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:447
time_t when_lost
Definition: cluster.h:73
#define PCMK__XA_IN_CCM
Definition: crm_internal.h:88
crm_status_type
Definition: cluster.h:181
#define crm_trace(fmt, args...)
Definition: logging.h:387
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:175
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:99
time_t when_member
Definition: cluster.h:82
void pcmk__refresh_node_caches_from_cib(xmlNode *cib)
Definition: membership.c:1306
int pcmk__xe_get_bool_attr(const xmlNode *node, const char *name, bool *value)
Definition: nvpair.c:878
#define PCMK__XP_REMOTE_NODE_CONFIG
Definition: xml_internal.h:182
void pcmk__str_update(char **str, const char *value)
Definition: strings.c:1193
#define XML_ATTR_UNAME
Definition: msg_xml.h:178
#define CRM_NODE_MEMBER
Definition: cluster.h:33
time_t when_online
Definition: cluster.h:83
crm_node_t * pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
Definition: membership.c:506
void crm_set_status_callback(void(*dispatch)(enum crm_status_type, crm_node_t *, const void *))
Set a client function that will be called after peer status changes.
Definition: membership.c:454
const char * name_for_cluster_type(enum cluster_type_e type)
Get a log-friendly string equivalent of a cluster type.
Definition: cluster.c:304
uint32_t id
Definition: cpg.c:45
crm_node_t * pcmk__search_known_node_cache(unsigned int id, const char *uname, uint32_t flags)
Definition: membership.c:1323
Deprecated Pacemaker cluster API.
int crm_terminate_member(int nodeid, const char *uname, void *unused)
Definition: membership.c:1354
char * expected
Definition: cluster.h:77
gboolean is_corosync_cluster(void)
Check whether the local cluster is a Corosync cluster.
Definition: cluster.c:389
#define CRM_XS
Definition: logging.h:56
void crm_remote_peer_cache_refresh(xmlNode *cib)
Repopulate the remote peer cache based on CIB XML.
Definition: membership.c:244
guint crm_active_peers(void)
Definition: membership.c:375
crm_node_t * pcmk__search_cluster_node_cache(unsigned int id, const char *uname, const char *uuid)
Definition: membership.c:582
#define PCMK__XP_REMOTE_NODE_STATUS
Definition: xml_internal.h:187
crm_node_t * crm_remote_peer_get(const char *node_name)
Get a remote node peer cache entry, creating it if necessary.
Definition: membership.c:107
pcmk__action_result_t result
Definition: pcmk_fence.c:35
#define crm_err(fmt, args...)
Definition: logging.h:381
#define CRM_ASSERT(expr)
Definition: results.h:42
Fencing aka. STONITH.
char * conn_host
Definition: cluster.h:80
char uname[MAX_NAME]
Definition: cpg.c:50
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
Definition: membership.c:1360
char * state
Definition: cluster.h:61
#define pcmk__plural_s(i)
void crm_foreach_xpath_result(xmlNode *xml, const char *xpath, void(*helper)(xmlNode *, void *), void *user_data)
Run a supplied function for each result of an xpath search.
Definition: xpath.c:170
bool pcmk__corosync_has_nodelist(void)
Definition: corosync.c:730
IPC interface to Pacemaker daemons.
crm_node_t * crm_update_peer_proc(const char *source, crm_node_t *node, uint32_t flag, const char *status)
Definition: membership.c:924
#define set_peer_flags(peer, flags_to_set)
Definition: membership.c:69
char * uname
Definition: cluster.h:59
uint64_t last_seen
Definition: cluster.h:63
#define ONLINESTATUS
Definition: util.h:37
void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork)
Definition: utils.c:397
#define PCMK__XP_MEMBER_NODE_CONFIG
Definition: xml_internal.h:171
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
Get a cluster node cache entry.
Definition: membership.c:814
#define crm_info(fmt, args...)
Definition: logging.h:384
const char * crm_peer_uuid(crm_node_t *node)
Get (and set if needed) a node&#39;s UUID.
Definition: cluster.c:38
crm_proc_flag
Definition: internal.h:17
uint64_t flags
Definition: remote.c:215
GHashTable * pcmk__strikey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition: strings.c:646
enum cluster_type_e get_cluster_type(void)
Get (and validate) the local cluster type.
Definition: cluster.c:325