pacemaker  2.0.5-ba59be712
Scalable High-Availability cluster resource manager
membership.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2020 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU Lesser General Public License
7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #ifndef _GNU_SOURCE
13 # define _GNU_SOURCE
14 #endif
15 
16 #include <sys/param.h>
17 #include <sys/types.h>
18 #include <stdio.h>
19 #include <unistd.h>
20 #include <string.h>
21 #include <glib.h>
22 #include <crm/common/ipc.h>
23 #include <crm/cluster/internal.h>
24 #include <crm/msg_xml.h>
25 #include <crm/stonith-ng.h>
26 
27 /* The peer cache remembers cluster nodes that have been seen.
28  * This is managed mostly automatically by libcluster, based on
29  * cluster membership events.
30  *
31  * Because cluster nodes can have conflicting names or UUIDs,
32  * the hash table key is a uniquely generated ID.
33  */
34 GHashTable *crm_peer_cache = NULL;
35 
36 /*
37  * The remote peer cache tracks pacemaker_remote nodes. While the
38  * value has the same type as the peer cache's, it is tracked separately for
39  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
40  * so the name (which is also the UUID) is used as the hash table key; there
41  * is no equivalent of membership events, so management is not automatic; and
42  * most users of the peer cache need to exclude pacemaker_remote nodes.
43  *
44  * That said, using a single cache would be more logical and less error-prone,
45  * so it would be a good idea to merge them one day.
46  *
47  * libcluster provides two avenues for populating the cache:
48  * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it,
49  * while crm_remote_peer_cache_refresh() populates it via the CIB.
50  */
51 GHashTable *crm_remote_peer_cache = NULL;
52 
53 GHashTable *crm_known_peer_cache = NULL;
54 
55 unsigned long long crm_peer_seq = 0;
56 gboolean crm_have_quorum = FALSE;
57 static gboolean crm_autoreap = TRUE;
58 
59 int
61 {
62  if (crm_remote_peer_cache == NULL) {
63  return 0;
64  }
65  return g_hash_table_size(crm_remote_peer_cache);
66 }
67 
79 crm_node_t *
80 crm_remote_peer_get(const char *node_name)
81 {
82  crm_node_t *node;
83 
84  if (node_name == NULL) {
85  errno = -EINVAL;
86  return NULL;
87  }
88 
89  /* Return existing cache entry if one exists */
90  node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
91  if (node) {
92  return node;
93  }
94 
95  /* Allocate a new entry */
96  node = calloc(1, sizeof(crm_node_t));
97  if (node == NULL) {
98  return NULL;
99  }
100 
101  /* Populate the essential information */
103  node->uuid = strdup(node_name);
104  if (node->uuid == NULL) {
105  free(node);
106  errno = -ENOMEM;
107  return NULL;
108  }
109 
110  /* Add the new entry to the cache */
111  g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
112  crm_trace("added %s to remote cache", node_name);
113 
114  /* Update the entry's uname, ensuring peer status callbacks are called */
115  crm_update_peer_uname(node, node_name);
116  return node;
117 }
118 
119 void
120 crm_remote_peer_cache_remove(const char *node_name)
121 {
122  if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
123  crm_trace("removed %s from remote peer cache", node_name);
124  }
125 }
126 
138 static const char *
139 remote_state_from_cib(xmlNode *node_state)
140 {
141  const char *status;
142 
143  status = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
144  if (status && !crm_is_true(status)) {
145  status = CRM_NODE_LOST;
146  } else {
147  status = CRM_NODE_MEMBER;
148  }
149  return status;
150 }
151 
152 /* user data for looping through remote node xpath searches */
153 struct refresh_data {
154  const char *field; /* XML attribute to check for node name */
155  gboolean has_state; /* whether to update node state based on XML */
156 };
157 
165 static void
166 remote_cache_refresh_helper(xmlNode *result, void *user_data)
167 {
168  struct refresh_data *data = user_data;
169  const char *remote = crm_element_value(result, data->field);
170  const char *state = NULL;
171  crm_node_t *node;
172 
173  CRM_CHECK(remote != NULL, return);
174 
175  /* Determine node's state, if the result has it */
176  if (data->has_state) {
177  state = remote_state_from_cib(result);
178  }
179 
180  /* Check whether cache already has entry for node */
181  node = g_hash_table_lookup(crm_remote_peer_cache, remote);
182 
183  if (node == NULL) {
184  /* Node is not in cache, so add a new entry for it */
185  node = crm_remote_peer_get(remote);
186  CRM_ASSERT(node);
187  if (state) {
188  crm_update_peer_state(__func__, node, state, 0);
189  }
190 
191  } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
192  /* Node is in cache and hasn't been updated already, so mark it clean */
194  if (state) {
195  crm_update_peer_state(__func__, node, state, 0);
196  }
197  }
198 }
199 
200 static void
201 mark_dirty(gpointer key, gpointer value, gpointer user_data)
202 {
204 }
205 
206 static gboolean
207 is_dirty(gpointer key, gpointer value, gpointer user_data)
208 {
209  return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
210 }
211 
212 /* search string to find CIB resources entries for guest nodes */
213 #define XPATH_GUEST_NODE_CONFIG \
214  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
215  "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \
216  "[@name='" XML_RSC_ATTR_REMOTE_NODE "']"
217 
218 /* search string to find CIB resources entries for remote nodes */
219 #define XPATH_REMOTE_NODE_CONFIG \
220  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
221  "[@type='remote'][@provider='pacemaker']"
222 
223 /* search string to find CIB node status entries for pacemaker_remote nodes */
224 #define XPATH_REMOTE_NODE_STATUS \
225  "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \
226  "[@" XML_NODE_IS_REMOTE "='true']"
227 
233 void
235 {
236  struct refresh_data data;
237 
238  crm_peer_init();
239 
240  /* First, we mark all existing cache entries as dirty,
241  * so that later we can remove any that weren't in the CIB.
242  * We don't empty the cache, because we need to detect changes in state.
243  */
244  g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
245 
246  /* Look for guest nodes and remote nodes in the status section */
247  data.field = "id";
248  data.has_state = TRUE;
250  remote_cache_refresh_helper, &data);
251 
252  /* Look for guest nodes and remote nodes in the configuration section,
253  * because they may have just been added and not have a status entry yet.
254  * In that case, the cached node state will be left NULL, so that the
255  * peer status callback isn't called until we're sure the node started
256  * successfully.
257  */
258  data.field = "value";
259  data.has_state = FALSE;
261  remote_cache_refresh_helper, &data);
262  data.field = "id";
263  data.has_state = FALSE;
265  remote_cache_refresh_helper, &data);
266 
267  /* Remove all old cache entries that weren't seen in the CIB */
268  g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
269 }
270 
271 gboolean
273 {
274  if(node == NULL) {
275  return FALSE;
276  }
277 
278  if (pcmk_is_set(node->flags, crm_remote_node)) {
279  /* remote nodes are never considered active members. This
280  * guarantees they will never be considered for DC membership.*/
281  return FALSE;
282  }
283 #if SUPPORT_COROSYNC
284  if (is_corosync_cluster()) {
285  return crm_is_corosync_peer_active(node);
286  }
287 #endif
288  crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
289  return FALSE;
290 }
291 
292 static gboolean
293 crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
294 {
295  crm_node_t *node = value;
296  crm_node_t *search = user_data;
297 
298  if (search == NULL) {
299  return FALSE;
300 
301  } else if (search->id && node->id != search->id) {
302  return FALSE;
303 
304  } else if (search->id == 0 && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
305  return FALSE;
306 
307  } else if (crm_is_peer_active(value) == FALSE) {
308  crm_info("Removing node with name %s and id %u from membership cache",
309  (node->uname? node->uname : "unknown"), node->id);
310  return TRUE;
311  }
312  return FALSE;
313 }
314 
323 guint
324 reap_crm_member(uint32_t id, const char *name)
325 {
326  int matches = 0;
327  crm_node_t search;
328 
329  if (crm_peer_cache == NULL) {
330  crm_trace("Membership cache not initialized, ignoring purge request");
331  return 0;
332  }
333 
334  search.id = id;
335  search.uname = name ? strdup(name) : NULL;
336  matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
337  if(matches) {
338  crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache",
339  matches, pcmk__plural_s(matches), search.id,
340  (search.uname? " and/or uname=" : ""),
341  (search.uname? search.uname : ""));
342 
343  } else {
344  crm_info("No peers with id=%u%s%s to purge from the membership cache",
345  search.id, (search.uname? " and/or uname=" : ""),
346  (search.uname? search.uname : ""));
347  }
348 
349  free(search.uname);
350  return matches;
351 }
352 
353 static void
354 crm_count_peer(gpointer key, gpointer value, gpointer user_data)
355 {
356  guint *count = user_data;
357  crm_node_t *node = value;
358 
359  if (crm_is_peer_active(node)) {
360  *count = *count + 1;
361  }
362 }
363 
364 guint
366 {
367  guint count = 0;
368 
369  if (crm_peer_cache) {
370  g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
371  }
372  return count;
373 }
374 
375 static void
376 destroy_crm_node(gpointer data)
377 {
378  crm_node_t *node = data;
379 
380  crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
381 
382  free(node->uname);
383  free(node->state);
384  free(node->uuid);
385  free(node->expected);
386  free(node);
387 }
388 
389 void
391 {
392  if (crm_peer_cache == NULL) {
393  crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
394  }
395 
396  if (crm_remote_peer_cache == NULL) {
397  crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
398  }
399 
400  if (crm_known_peer_cache == NULL) {
401  crm_known_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
402  }
403 }
404 
405 void
407 {
408  if (crm_peer_cache != NULL) {
409  crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
410  g_hash_table_destroy(crm_peer_cache);
411  crm_peer_cache = NULL;
412  }
413 
414  if (crm_remote_peer_cache != NULL) {
415  crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
416  g_hash_table_destroy(crm_remote_peer_cache);
417  crm_remote_peer_cache = NULL;
418  }
419 
420  if (crm_known_peer_cache != NULL) {
421  crm_trace("Destroying known peer cache with %d members", g_hash_table_size(crm_known_peer_cache));
422  g_hash_table_destroy(crm_known_peer_cache);
423  crm_known_peer_cache = NULL;
424  }
425 
426 }
427 
428 void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
429 
440 void
441 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
442 {
443  crm_status_callback = dispatch;
444 }
445 
457 void
458 crm_set_autoreap(gboolean autoreap)
459 {
460  crm_autoreap = autoreap;
461 }
462 
463 static void crm_dump_peer_hash(int level, const char *caller)
464 {
465  GHashTableIter iter;
466  const char *id = NULL;
467  crm_node_t *node = NULL;
468 
469  g_hash_table_iter_init(&iter, crm_peer_cache);
470  while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
471  do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
472  }
473 }
474 
475 static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
476 {
477  if(value == user_data) {
478  return TRUE;
479  }
480  return FALSE;
481 }
482 
483 crm_node_t *
484 crm_find_peer_full(unsigned int id, const char *uname, int flags)
485 {
486  crm_node_t *node = NULL;
487 
488  CRM_ASSERT(id > 0 || uname != NULL);
489 
490  crm_peer_init();
491 
492  if ((uname != NULL) && (flags & CRM_GET_PEER_REMOTE)) {
493  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
494  }
495 
496  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
497  node = crm_find_peer(id, uname);
498  }
499  return node;
500 }
501 
502 crm_node_t *
503 crm_get_peer_full(unsigned int id, const char *uname, int flags)
504 {
505  crm_node_t *node = NULL;
506 
507  CRM_ASSERT(id > 0 || uname != NULL);
508 
509  crm_peer_init();
510 
511  if (flags & CRM_GET_PEER_REMOTE) {
512  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
513  }
514 
515  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
516  node = crm_get_peer(id, uname);
517  }
518  return node;
519 }
520 
521 crm_node_t *
522 crm_find_peer(unsigned int id, const char *uname)
523 {
524  GHashTableIter iter;
525  crm_node_t *node = NULL;
526  crm_node_t *by_id = NULL;
527  crm_node_t *by_name = NULL;
528 
529  CRM_ASSERT(id > 0 || uname != NULL);
530 
531  crm_peer_init();
532 
533  if (uname != NULL) {
534  g_hash_table_iter_init(&iter, crm_peer_cache);
535  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
536  if(node->uname && strcasecmp(node->uname, uname) == 0) {
537  crm_trace("Name match: %s = %p", node->uname, node);
538  by_name = node;
539  break;
540  }
541  }
542  }
543 
544  if (id > 0) {
545  g_hash_table_iter_init(&iter, crm_peer_cache);
546  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
547  if(node->id == id) {
548  crm_trace("ID match: %u = %p", node->id, node);
549  by_id = node;
550  break;
551  }
552  }
553  }
554 
555  node = by_id; /* Good default */
556  if(by_id == by_name) {
557  /* Nothing to do if they match (both NULL counts) */
558  crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
559 
560  } else if(by_id == NULL && by_name) {
561  crm_trace("Only one: %p for %u/%s", by_name, id, uname);
562 
563  if(id && by_name->id) {
564  crm_dump_peer_hash(LOG_WARNING, __func__);
565  crm_crit("Node %u and %u share the same name '%s'",
566  id, by_name->id, uname);
567  node = NULL; /* Create a new one */
568 
569  } else {
570  node = by_name;
571  }
572 
573  } else if(by_name == NULL && by_id) {
574  crm_trace("Only one: %p for %u/%s", by_id, id, uname);
575 
576  if(uname && by_id->uname) {
577  crm_dump_peer_hash(LOG_WARNING, __func__);
578  crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
579  uname, by_id->uname, id, uname);
580  }
581 
582  } else if(uname && by_id->uname) {
583  if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
584  crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
585  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
586 
587  } else {
588  crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
589  crm_dump_peer_hash(LOG_INFO, __func__);
590  crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
591  TRUE);
592  }
593 
594  } else if(id && by_name->id) {
595  crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
596 
597  } else {
598  /* Simple merge */
599 
600  /* Only corosync-based clusters use node IDs. The functions that call
601  * crm_update_peer_state() and crm_update_peer_proc() only know nodeid,
602  * so 'by_id' is authoritative when merging.
603  */
604  crm_dump_peer_hash(LOG_DEBUG, __func__);
605 
606  crm_info("Merging %p into %p", by_name, by_id);
607  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
608  }
609 
610  return node;
611 }
612 
613 #if SUPPORT_COROSYNC
614 static guint
615 crm_remove_conflicting_peer(crm_node_t *node)
616 {
617  int matches = 0;
618  GHashTableIter iter;
619  crm_node_t *existing_node = NULL;
620 
621  if (node->id == 0 || node->uname == NULL) {
622  return 0;
623  }
624 
625  if (corosync_cmap_has_config("nodelist") != 0) {
626  return 0;
627  }
628 
629  g_hash_table_iter_init(&iter, crm_peer_cache);
630  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
631  if (existing_node->id > 0
632  && existing_node->id != node->id
633  && existing_node->uname != NULL
634  && strcasecmp(existing_node->uname, node->uname) == 0) {
635 
636  if (crm_is_peer_active(existing_node)) {
637  continue;
638  }
639 
640  crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
641  existing_node->id, existing_node->uname, node->id);
642 
643  g_hash_table_iter_remove(&iter);
644  matches++;
645  }
646  }
647 
648  return matches;
649 }
650 #endif
651 
652 /* coverity[-alloc] Memory is referenced in one or both hashtables */
653 crm_node_t *
654 crm_get_peer(unsigned int id, const char *uname)
655 {
656  crm_node_t *node = NULL;
657  char *uname_lookup = NULL;
658 
659  CRM_ASSERT(id > 0 || uname != NULL);
660 
661  crm_peer_init();
662 
663  node = crm_find_peer(id, uname);
664 
665  /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
666  * we need to do a lookup of the node name using the id in the cluster membership. */
667  if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
668  uname_lookup = get_node_name(id);
669  }
670 
671  if (uname_lookup) {
672  uname = uname_lookup;
673  crm_trace("Inferred a name of '%s' for node %u", uname, id);
674 
675  /* try to turn up the node one more time now that we know the uname. */
676  if (node == NULL) {
677  node = crm_find_peer(id, uname);
678  }
679  }
680 
681 
682  if (node == NULL) {
683  char *uniqueid = crm_generate_uuid();
684 
685  node = calloc(1, sizeof(crm_node_t));
686  CRM_ASSERT(node);
687 
688  crm_info("Created entry %s/%p for node %s/%u (%d total)",
689  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
690  g_hash_table_replace(crm_peer_cache, uniqueid, node);
691  }
692 
693  if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
694  crm_info("Node %u is now known as %s", id, uname);
695  }
696 
697  if(id > 0 && node->id == 0) {
698  node->id = id;
699  }
700 
701  if (uname && (node->uname == NULL)) {
703  }
704 
705  if(node->uuid == NULL) {
706  const char *uuid = crm_peer_uuid(node);
707 
708  if (uuid) {
709  crm_info("Node %u has uuid %s", id, uuid);
710 
711  } else {
712  crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
713  }
714  }
715 
716  free(uname_lookup);
717 
718  return node;
719 }
720 
732 void
734 {
735  CRM_CHECK(uname != NULL,
736  crm_err("Bug: can't update node name without name"); return);
737  CRM_CHECK(node != NULL,
738  crm_err("Bug: can't update node name to %s without node", uname);
739  return);
740 
741  if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
742  crm_debug("Node uname '%s' did not change", uname);
743  return;
744  }
745 
746  for (const char *c = uname; *c; ++c) {
747  if ((*c >= 'A') && (*c <= 'Z')) {
748  crm_warn("Node names with capitals are discouraged, consider changing '%s'",
749  uname);
750  break;
751  }
752  }
753 
754  free(node->uname);
755  node->uname = strdup(uname);
756  CRM_ASSERT(node->uname != NULL);
757 
758  if (crm_status_callback) {
760  }
761 
762 #if SUPPORT_COROSYNC
764  crm_remove_conflicting_peer(node);
765  }
766 #endif
767 }
768 
785 crm_node_t *
786 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
787 {
788  uint32_t last = 0;
789  gboolean changed = FALSE;
790 
791  CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
792  source, peer2text(flag), status); return NULL);
793 
794  /* Pacemaker doesn't spawn processes on remote nodes */
795  if (pcmk_is_set(node->flags, crm_remote_node)) {
796  return node;
797  }
798 
799  last = node->processes;
800  if (status == NULL) {
801  node->processes = flag;
802  if (node->processes != last) {
803  changed = TRUE;
804  }
805 
806  } else if (pcmk__str_eq(status, ONLINESTATUS, pcmk__str_casei)) {
807  if ((node->processes & flag) != flag) {
808  node->processes = pcmk__set_flags_as(__func__, __LINE__,
809  LOG_TRACE, "Peer process",
810  node->uname, node->processes,
811  flag, "processes");
812  changed = TRUE;
813  }
814 
815  } else if (node->processes & flag) {
816  node->processes = pcmk__clear_flags_as(__func__, __LINE__,
817  LOG_TRACE, "Peer process",
818  node->uname, node->processes,
819  flag, "processes");
820  changed = TRUE;
821  }
822 
823  if (changed) {
824  if (status == NULL && flag <= crm_proc_none) {
825  crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
826  node->id);
827  } else {
828  crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
829  peer2text(flag), status);
830  }
831 
832  /* Call the client callback first, then update the peer state,
833  * in case the node will be reaped
834  */
835  if (crm_status_callback) {
837  }
838 
839  /* The client callback shouldn't touch the peer caches,
840  * but as a safety net, bail if the peer cache was destroyed.
841  */
842  if (crm_peer_cache == NULL) {
843  return NULL;
844  }
845 
846  if (crm_autoreap) {
847  const char *peer_state = NULL;
848 
849  if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
850  peer_state = CRM_NODE_MEMBER;
851  } else {
852  peer_state = CRM_NODE_LOST;
853  }
854  node = crm_update_peer_state(__func__, node, peer_state, 0);
855  }
856  } else {
857  crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
858  peer2text(flag), status);
859  }
860  return node;
861 }
862 
863 void
864 crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected)
865 {
866  char *last = NULL;
867  gboolean changed = FALSE;
868 
869  CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
870  return);
871 
872  /* Remote nodes don't participate in joins */
873  if (pcmk_is_set(node->flags, crm_remote_node)) {
874  return;
875  }
876 
877  last = node->expected;
878  if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
879  node->expected = strdup(expected);
880  changed = TRUE;
881  }
882 
883  if (changed) {
884  crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
885  expected, last);
886  free(last);
887  } else {
888  crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
889  node->id, expected);
890  }
891 }
892 
909 static crm_node_t *
910 crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, uint64_t membership, GHashTableIter *iter)
911 {
912  gboolean is_member;
913 
914  CRM_CHECK(node != NULL,
915  crm_err("Could not set state for unknown host to %s"
916  CRM_XS " source=%s", state, source);
917  return NULL);
918 
919  is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
920  if (is_member) {
921  node->when_lost = 0;
922  if (membership) {
923  node->last_seen = membership;
924  }
925  }
926 
927  if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
928  char *last = node->state;
929 
930  node->state = strdup(state);
931  crm_notice("Node %s state is now %s " CRM_XS
932  " nodeid=%u previous=%s source=%s", node->uname, state,
933  node->id, (last? last : "unknown"), source);
934  if (crm_status_callback) {
936  }
937  free(last);
938 
939  if (crm_autoreap && !is_member
940  && !pcmk_is_set(node->flags, crm_remote_node)) {
941  /* We only autoreap from the peer cache, not the remote peer cache,
942  * because the latter should be managed only by
943  * crm_remote_peer_cache_refresh().
944  */
945  if(iter) {
946  crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
947  g_hash_table_iter_remove(iter);
948 
949  } else {
950  reap_crm_member(node->id, node->uname);
951  }
952  node = NULL;
953  }
954 
955  } else {
956  crm_trace("Node %s state is unchanged (%s) " CRM_XS
957  " nodeid=%u source=%s", node->uname, state, node->id, source);
958  }
959  return node;
960 }
961 
977 crm_node_t *
978 crm_update_peer_state(const char *source, crm_node_t * node, const char *state, uint64_t membership)
979 {
980  return crm_update_peer_state_iter(source, node, state, membership, NULL);
981 }
982 
989 void
990 crm_reap_unseen_nodes(uint64_t membership)
991 {
992  GHashTableIter iter;
993  crm_node_t *node = NULL;
994 
995  crm_trace("Reaping unseen nodes...");
996  g_hash_table_iter_init(&iter, crm_peer_cache);
997  while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
998  if (node->last_seen != membership) {
999  if (node->state) {
1000  /*
1001  * Calling crm_update_peer_state_iter() allows us to
1002  * remove the node from crm_peer_cache without
1003  * invalidating our iterator
1004  */
1005  crm_update_peer_state_iter(__func__, node, CRM_NODE_LOST,
1006  membership, &iter);
1007 
1008  } else {
1009  crm_info("State of node %s[%u] is still unknown",
1010  node->uname, node->id);
1011  }
1012  }
1013  }
1014 }
1015 
1016 int
1017 crm_terminate_member(int nodeid, const char *uname, void *unused)
1018 {
1019  /* Always use the synchronous, non-mainloop version */
1020  return stonith_api_kick(nodeid, uname, 120, TRUE);
1021 }
1022 
1023 int
1024 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1025 {
1026  return stonith_api_kick(nodeid, uname, 120, TRUE);
1027 }
1028 
1029 static crm_node_t *
1030 crm_find_known_peer(const char *id, const char *uname)
1031 {
1032  GHashTableIter iter;
1033  crm_node_t *node = NULL;
1034  crm_node_t *by_id = NULL;
1035  crm_node_t *by_name = NULL;
1036 
1037  if (uname) {
1038  g_hash_table_iter_init(&iter, crm_known_peer_cache);
1039  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1040  if (node->uname && strcasecmp(node->uname, uname) == 0) {
1041  crm_trace("Name match: %s = %p", node->uname, node);
1042  by_name = node;
1043  break;
1044  }
1045  }
1046  }
1047 
1048  if (id) {
1049  g_hash_table_iter_init(&iter, crm_known_peer_cache);
1050  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1051  if(strcasecmp(node->uuid, id) == 0) {
1052  crm_trace("ID match: %s= %p", id, node);
1053  by_id = node;
1054  break;
1055  }
1056  }
1057  }
1058 
1059  node = by_id; /* Good default */
1060  if (by_id == by_name) {
1061  /* Nothing to do if they match (both NULL counts) */
1062  crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1063 
1064  } else if (by_id == NULL && by_name) {
1065  crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1066 
1067  if (id) {
1068  node = NULL;
1069 
1070  } else {
1071  node = by_name;
1072  }
1073 
1074  } else if (by_name == NULL && by_id) {
1075  crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1076 
1077  if (uname) {
1078  node = NULL;
1079  }
1080 
1081  } else if (uname && by_id->uname
1082  && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
1083  /* Multiple nodes have the same uname in the CIB.
1084  * Return by_id. */
1085 
1086  } else if (id && by_name->uuid
1087  && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
1088  /* Multiple nodes have the same id in the CIB.
1089  * Return by_name. */
1090  node = by_name;
1091 
1092  } else {
1093  node = NULL;
1094  }
1095 
1096  if (node == NULL) {
1097  crm_debug("Couldn't find node%s%s%s%s",
1098  id? " " : "",
1099  id? id : "",
1100  uname? " with name " : "",
1101  uname? uname : "");
1102  }
1103 
1104  return node;
1105 }
1106 
1107 static void
1108 known_peer_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1109 {
1110  const char *id = crm_element_value(xml_node, XML_ATTR_ID);
1111  const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME);
1112  crm_node_t * node = NULL;
1113 
1114  CRM_CHECK(id != NULL && uname !=NULL, return);
1115  node = crm_find_known_peer(id, uname);
1116 
1117  if (node == NULL) {
1118  char *uniqueid = crm_generate_uuid();
1119 
1120  node = calloc(1, sizeof(crm_node_t));
1121  CRM_ASSERT(node != NULL);
1122 
1123  node->uname = strdup(uname);
1124  CRM_ASSERT(node->uname != NULL);
1125 
1126  node->uuid = strdup(id);
1127  CRM_ASSERT(node->uuid != NULL);
1128 
1129  g_hash_table_replace(crm_known_peer_cache, uniqueid, node);
1130 
1131  } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
1132  if (!pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
1133  free(node->uname);
1134  node->uname = strdup(uname);
1135  CRM_ASSERT(node->uname != NULL);
1136  }
1137 
1138  /* Node is in cache and hasn't been updated already, so mark it clean */
1140  }
1141 
1142 }
1143 
1144 #define XPATH_MEMBER_NODE_CONFIG \
1145  "//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES \
1146  "/" XML_CIB_TAG_NODE "[not(@type) or @type='member']"
1147 
1148 static void
1149 crm_known_peer_cache_refresh(xmlNode *cib)
1150 {
1151  crm_peer_init();
1152 
1153  g_hash_table_foreach(crm_known_peer_cache, mark_dirty, NULL);
1154 
1156  known_peer_cache_refresh_helper, NULL);
1157 
1158  /* Remove all old cache entries that weren't seen in the CIB */
1159  g_hash_table_foreach_remove(crm_known_peer_cache, is_dirty, NULL);
1160 }
1161 
1162 void
1164 {
1166  crm_known_peer_cache_refresh(cib);
1167 }
1168 
1169 crm_node_t *
1170 crm_find_known_peer_full(unsigned int id, const char *uname, int flags)
1171 {
1172  crm_node_t *node = NULL;
1173  char *id_str = NULL;
1174 
1175  CRM_ASSERT(id > 0 || uname != NULL);
1176 
1177  node = crm_find_peer_full(id, uname, flags);
1178 
1179  if (node || !(flags & CRM_GET_PEER_CLUSTER)) {
1180  return node;
1181  }
1182 
1183  if (id > 0) {
1184  id_str = crm_strdup_printf("%u", id);
1185  }
1186 
1187  node = crm_find_known_peer(id_str, uname);
1188 
1189  free(id_str);
1190  return node;
1191 }
#define LOG_TRACE
Definition: logging.h:36
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:215
char uname[MAX_NAME]
Definition: internal.h:85
void crm_reap_unseen_nodes(uint64_t membership)
Definition: membership.c:990
#define crm_notice(fmt, args...)
Definition: logging.h:349
#define CRM_NODE_LOST
Definition: cluster.h:33
#define XPATH_REMOTE_NODE_STATUS
Definition: membership.c:224
GHashTable * crm_peer_cache
Definition: membership.c:34
#define crm_crit(fmt, args...)
Definition: logging.h:346
char * crm_generate_uuid(void)
Definition: utils.c:498
uint64_t flags
Definition: cluster.h:60
void crm_peer_destroy(void)
Definition: membership.c:406
uint32_t id
Definition: cluster.h:65
char * uuid
Definition: cluster.h:58
#define pcmk__clear_peer_flags(peer, flags_to_clear)
Definition: internal.h:22
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
Definition: st_client.c:2255
crm_node_t * crm_find_peer(unsigned int id, const char *uname)
Definition: membership.c:522
crm_node_t * crm_find_known_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:1170
gboolean crm_have_quorum
Definition: membership.c:56
crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:484
GHashTable * crm_remote_peer_cache
Definition: membership.c:51
#define pcmk__set_peer_flags(peer, flags_to_set)
Definition: internal.h:15
unsigned long long crm_peer_seq
Definition: membership.c:55
char * get_node_name(uint32_t nodeid)
Definition: cluster.c:132
void crm_set_autoreap(gboolean autoreap)
Tell the library whether to automatically reap lost nodes.
Definition: membership.c:458
void crm_peer_caches_refresh(xmlNode *cib)
Definition: membership.c:1163
void crm_peer_init(void)
Definition: membership.c:390
void crm_remote_peer_cache_remove(const char *node_name)
Definition: membership.c:120
gboolean crm_is_corosync_peer_active(const crm_node_t *node)
Definition: corosync.c:512
crm_node_t * crm_update_peer_state(const char *source, crm_node_t *node, const char *state, uint64_t membership)
Update a node's state and membership information.
Definition: membership.c:978
uint32_t id
Definition: internal.h:80
int crm_remote_peer_cache_size(void)
Definition: membership.c:60
#define crm_warn(fmt, args...)
Definition: logging.h:348
uint32_t processes
Definition: cluster.h:62
crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:503
guint reap_crm_member(uint32_t id, const char *name)
Remove all peer cache entries matching a node ID and/or uname.
Definition: membership.c:324
gboolean crm_is_peer_active(const crm_node_t *node)
Definition: membership.c:272
#define crm_debug(fmt, args...)
Definition: logging.h:352
#define XML_ATTR_ID
Definition: msg_xml.h:96
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:523
time_t when_lost
Definition: cluster.h:66
#define XPATH_GUEST_NODE_CONFIG
Definition: membership.c:213
crm_status_type
Definition: cluster.h:166
void crm_update_peer_expected(const char *source, crm_node_t *node, const char *expected)
Definition: membership.c:864
#define crm_trace(fmt, args...)
Definition: logging.h:353
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:156
guint crm_strcase_hash(gconstpointer v)
Definition: strings.c:569
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:196
#define XML_ATTR_UNAME
Definition: msg_xml.h:118
int corosync_cmap_has_config(const char *prefix)
Definition: corosync.c:704
#define CRM_NODE_MEMBER
Definition: cluster.h:34
void crm_update_peer_uname(crm_node_t *node, const char *uname)
Definition: membership.c:733
void crm_set_status_callback(void(*dispatch)(enum crm_status_type, crm_node_t *, const void *))
Set a client function that will be called after peer status changes.
Definition: membership.c:441
const char * name_for_cluster_type(enum cluster_type_e type)
Definition: cluster.c:236
int crm_terminate_member(int nodeid, const char *uname, void *unused)
Definition: membership.c:1017
char * expected
Definition: cluster.h:70
void(* crm_status_callback)(enum crm_status_type, crm_node_t *, const void *)
Definition: membership.c:428
gboolean is_corosync_cluster(void)
Definition: cluster.c:330
#define CRM_XS
Definition: logging.h:54
void crm_remote_peer_cache_refresh(xmlNode *cib)
Repopulate the remote peer cache based on CIB XML.
Definition: membership.c:234
guint crm_active_peers(void)
Definition: membership.c:365
crm_node_t * crm_remote_peer_get(const char *node_name)
Get a remote node peer cache entry, creating it if necessary.
Definition: membership.c:80
#define crm_err(fmt, args...)
Definition: logging.h:347
#define CRM_ASSERT(expr)
Definition: results.h:42
Fencing aka. STONITH.
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
Definition: membership.c:1024
char data[0]
Definition: internal.h:90
char * state
Definition: cluster.h:59
#define pcmk__plural_s(i)
void crm_foreach_xpath_result(xmlNode *xml, const char *xpath, void(*helper)(xmlNode *, void *), void *user_data)
Run a supplied function for each result of an xpath search.
Definition: xpath.c:172
IPC interface to Pacemaker daemons.
GHashTable * crm_known_peer_cache
Definition: membership.c:53
crm_node_t * crm_update_peer_proc(const char *source, crm_node_t *node, uint32_t flag, const char *status)
Definition: membership.c:786
char * uname
Definition: cluster.h:57
uint64_t last_seen
Definition: cluster.h:61
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:247
gboolean crm_is_true(const char *s)
Definition: strings.c:392
#define ONLINESTATUS
Definition: util.h:37
void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork)
Definition: utils.c:337
char * name
Definition: pcmk_fence.c:31
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
Definition: membership.c:654
char * crm_strdup_printf(char const *format,...) __attribute__((__format__(__printf__
#define XPATH_MEMBER_NODE_CONFIG
Definition: membership.c:1144
#define XPATH_REMOTE_NODE_CONFIG
Definition: membership.c:219
#define crm_info(fmt, args...)
Definition: logging.h:350
const char * crm_peer_uuid(crm_node_t *node)
Definition: cluster.c:30
uint64_t flags
Definition: remote.c:149
gboolean crm_strcase_equal(gconstpointer a, gconstpointer b)
Definition: strings.c:563
enum cluster_type_e get_cluster_type(void)
Definition: cluster.c:275