pacemaker  2.0.2-debe490
Scalable High-Availability cluster resource manager
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
membership.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This source code is licensed under the GNU Lesser General Public License
5  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
6  */
7 
8 #include <crm_internal.h>
9 
10 #ifndef _GNU_SOURCE
11 # define _GNU_SOURCE
12 #endif
13 
14 #include <sys/param.h>
15 #include <sys/types.h>
16 #include <stdio.h>
17 #include <unistd.h>
18 #include <string.h>
19 #include <glib.h>
20 #include <crm/common/ipc.h>
21 #include <crm/cluster/internal.h>
22 #include <crm/msg_xml.h>
23 #include <crm/stonith-ng.h>
24 
25 #define s_if_plural(i) (((i) == 1)? "" : "s")
26 
27 /* The peer cache remembers cluster nodes that have been seen.
28  * This is managed mostly automatically by libcluster, based on
29  * cluster membership events.
30  *
31  * Because cluster nodes can have conflicting names or UUIDs,
32  * the hash table key is a uniquely generated ID.
33  */
34 GHashTable *crm_peer_cache = NULL;
35 
36 /*
37  * The remote peer cache tracks pacemaker_remote nodes. While the
38  * value has the same type as the peer cache's, it is tracked separately for
39  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
40  * so the name (which is also the UUID) is used as the hash table key; there
41  * is no equivalent of membership events, so management is not automatic; and
42  * most users of the peer cache need to exclude pacemaker_remote nodes.
43  *
44  * That said, using a single cache would be more logical and less error-prone,
45  * so it would be a good idea to merge them one day.
46  *
47  * libcluster provides two avenues for populating the cache:
48  * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it,
49  * while crm_remote_peer_cache_refresh() populates it via the CIB.
50  */
51 GHashTable *crm_remote_peer_cache = NULL;
52 
53 GHashTable *crm_known_peer_cache = NULL;
54 
55 unsigned long long crm_peer_seq = 0;
56 gboolean crm_have_quorum = FALSE;
57 static gboolean crm_autoreap = TRUE;
58 
59 int
61 {
62  if (crm_remote_peer_cache == NULL) {
63  return 0;
64  }
65  return g_hash_table_size(crm_remote_peer_cache);
66 }
67 
79 crm_node_t *
80 crm_remote_peer_get(const char *node_name)
81 {
82  crm_node_t *node;
83 
84  if (node_name == NULL) {
85  errno = -EINVAL;
86  return NULL;
87  }
88 
89  /* Return existing cache entry if one exists */
90  node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
91  if (node) {
92  return node;
93  }
94 
95  /* Allocate a new entry */
96  node = calloc(1, sizeof(crm_node_t));
97  if (node == NULL) {
98  return NULL;
99  }
100 
101  /* Populate the essential information */
102  node->flags = crm_remote_node;
103  node->uuid = strdup(node_name);
104  if (node->uuid == NULL) {
105  free(node);
106  errno = -ENOMEM;
107  return NULL;
108  }
109 
110  /* Add the new entry to the cache */
111  g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
112  crm_trace("added %s to remote cache", node_name);
113 
114  /* Update the entry's uname, ensuring peer status callbacks are called */
115  crm_update_peer_uname(node, node_name);
116  return node;
117 }
118 
119 void
120 crm_remote_peer_cache_remove(const char *node_name)
121 {
122  if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
123  crm_trace("removed %s from remote peer cache", node_name);
124  }
125 }
126 
138 static const char *
139 remote_state_from_cib(xmlNode *node_state)
140 {
141  const char *status;
142 
143  status = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
144  if (status && !crm_is_true(status)) {
145  status = CRM_NODE_LOST;
146  } else {
147  status = CRM_NODE_MEMBER;
148  }
149  return status;
150 }
151 
152 /* user data for looping through remote node xpath searches */
153 struct refresh_data {
154  const char *field; /* XML attribute to check for node name */
155  gboolean has_state; /* whether to update node state based on XML */
156 };
157 
165 static void
166 remote_cache_refresh_helper(xmlNode *result, void *user_data)
167 {
168  struct refresh_data *data = user_data;
169  const char *remote = crm_element_value(result, data->field);
170  const char *state = NULL;
171  crm_node_t *node;
172 
173  CRM_CHECK(remote != NULL, return);
174 
175  /* Determine node's state, if the result has it */
176  if (data->has_state) {
177  state = remote_state_from_cib(result);
178  }
179 
180  /* Check whether cache already has entry for node */
181  node = g_hash_table_lookup(crm_remote_peer_cache, remote);
182 
183  if (node == NULL) {
184  /* Node is not in cache, so add a new entry for it */
185  node = crm_remote_peer_get(remote);
186  CRM_ASSERT(node);
187  if (state) {
188  crm_update_peer_state(__FUNCTION__, node, state, 0);
189  }
190 
191  } else if (is_set(node->flags, crm_node_dirty)) {
192  /* Node is in cache and hasn't been updated already, so mark it clean */
194  if (state) {
195  crm_update_peer_state(__FUNCTION__, node, state, 0);
196  }
197  }
198 }
199 
200 static void
201 mark_dirty(gpointer key, gpointer value, gpointer user_data)
202 {
203  set_bit(((crm_node_t*)value)->flags, crm_node_dirty);
204 }
205 
206 static gboolean
207 is_dirty(gpointer key, gpointer value, gpointer user_data)
208 {
209  return is_set(((crm_node_t*)value)->flags, crm_node_dirty);
210 }
211 
212 /* search string to find CIB resources entries for guest nodes */
213 #define XPATH_GUEST_NODE_CONFIG \
214  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
215  "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \
216  "[@name='" XML_RSC_ATTR_REMOTE_NODE "']"
217 
218 /* search string to find CIB resources entries for remote nodes */
219 #define XPATH_REMOTE_NODE_CONFIG \
220  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
221  "[@type='remote'][@provider='pacemaker']"
222 
223 /* search string to find CIB node status entries for pacemaker_remote nodes */
224 #define XPATH_REMOTE_NODE_STATUS \
225  "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \
226  "[@" XML_NODE_IS_REMOTE "='true']"
227 
233 void
235 {
236  struct refresh_data data;
237 
238  crm_peer_init();
239 
240  /* First, we mark all existing cache entries as dirty,
241  * so that later we can remove any that weren't in the CIB.
242  * We don't empty the cache, because we need to detect changes in state.
243  */
244  g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
245 
246  /* Look for guest nodes and remote nodes in the status section */
247  data.field = "id";
248  data.has_state = TRUE;
250  remote_cache_refresh_helper, &data);
251 
252  /* Look for guest nodes and remote nodes in the configuration section,
253  * because they may have just been added and not have a status entry yet.
254  * In that case, the cached node state will be left NULL, so that the
255  * peer status callback isn't called until we're sure the node started
256  * successfully.
257  */
258  data.field = "value";
259  data.has_state = FALSE;
261  remote_cache_refresh_helper, &data);
262  data.field = "id";
263  data.has_state = FALSE;
265  remote_cache_refresh_helper, &data);
266 
267  /* Remove all old cache entries that weren't seen in the CIB */
268  g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
269 }
270 
271 gboolean
273 {
274  if(node == NULL) {
275  return FALSE;
276  }
277 
278  if (is_set(node->flags, crm_remote_node)) {
279  /* remote nodes are never considered active members. This
280  * guarantees they will never be considered for DC membership.*/
281  return FALSE;
282  }
283 #if SUPPORT_COROSYNC
284  if (is_corosync_cluster()) {
285  return crm_is_corosync_peer_active(node);
286  }
287 #endif
288  crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
289  return FALSE;
290 }
291 
292 static gboolean
293 crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
294 {
295  crm_node_t *node = value;
296  crm_node_t *search = user_data;
297 
298  if (search == NULL) {
299  return FALSE;
300 
301  } else if (search->id && node->id != search->id) {
302  return FALSE;
303 
304  } else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) {
305  return FALSE;
306 
307  } else if (crm_is_peer_active(value) == FALSE) {
308  crm_info("Removing node with name %s and id %u from membership cache",
309  (node->uname? node->uname : "unknown"), node->id);
310  return TRUE;
311  }
312  return FALSE;
313 }
314 
323 guint
324 reap_crm_member(uint32_t id, const char *name)
325 {
326  int matches = 0;
327  crm_node_t search;
328 
329  if (crm_peer_cache == NULL) {
330  crm_trace("Membership cache not initialized, ignoring purge request");
331  return 0;
332  }
333 
334  search.id = id;
335  search.uname = name ? strdup(name) : NULL;
336  matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
337  if(matches) {
338  crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache",
339  matches, s_if_plural(matches), search.id,
340  (search.uname? " and/or uname=" : ""),
341  (search.uname? search.uname : ""));
342 
343  } else {
344  crm_info("No peers with id=%u%s%s to purge from the membership cache",
345  search.id, (search.uname? " and/or uname=" : ""),
346  (search.uname? search.uname : ""));
347  }
348 
349  free(search.uname);
350  return matches;
351 }
352 
353 static void
354 crm_count_peer(gpointer key, gpointer value, gpointer user_data)
355 {
356  guint *count = user_data;
357  crm_node_t *node = value;
358 
359  if (crm_is_peer_active(node)) {
360  *count = *count + 1;
361  }
362 }
363 
364 guint
366 {
367  guint count = 0;
368 
369  if (crm_peer_cache) {
370  g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
371  }
372  return count;
373 }
374 
375 static void
376 destroy_crm_node(gpointer data)
377 {
378  crm_node_t *node = data;
379 
380  crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
381 
382  free(node->uname);
383  free(node->state);
384  free(node->uuid);
385  free(node->expected);
386  free(node);
387 }
388 
389 void
391 {
392  if (crm_peer_cache == NULL) {
393  crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
394  }
395 
396  if (crm_remote_peer_cache == NULL) {
397  crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
398  }
399 
400  if (crm_known_peer_cache == NULL) {
401  crm_known_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
402  }
403 }
404 
405 void
407 {
408  if (crm_peer_cache != NULL) {
409  crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
410  g_hash_table_destroy(crm_peer_cache);
411  crm_peer_cache = NULL;
412  }
413 
414  if (crm_remote_peer_cache != NULL) {
415  crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
416  g_hash_table_destroy(crm_remote_peer_cache);
417  crm_remote_peer_cache = NULL;
418  }
419 
420  if (crm_known_peer_cache != NULL) {
421  crm_trace("Destroying known peer cache with %d members", g_hash_table_size(crm_known_peer_cache));
422  g_hash_table_destroy(crm_known_peer_cache);
423  crm_known_peer_cache = NULL;
424  }
425 
426 }
427 
428 void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
429 
440 void
441 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
442 {
443  crm_status_callback = dispatch;
444 }
445 
457 void
458 crm_set_autoreap(gboolean autoreap)
459 {
460  crm_autoreap = autoreap;
461 }
462 
463 static void crm_dump_peer_hash(int level, const char *caller)
464 {
465  GHashTableIter iter;
466  const char *id = NULL;
467  crm_node_t *node = NULL;
468 
469  g_hash_table_iter_init(&iter, crm_peer_cache);
470  while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
471  do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
472  }
473 }
474 
475 static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
476 {
477  if(value == user_data) {
478  return TRUE;
479  }
480  return FALSE;
481 }
482 
483 crm_node_t *
484 crm_find_peer_full(unsigned int id, const char *uname, int flags)
485 {
486  crm_node_t *node = NULL;
487 
488  CRM_ASSERT(id > 0 || uname != NULL);
489 
490  crm_peer_init();
491 
492  if ((uname != NULL) && (flags & CRM_GET_PEER_REMOTE)) {
493  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
494  }
495 
496  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
497  node = crm_find_peer(id, uname);
498  }
499  return node;
500 }
501 
502 crm_node_t *
503 crm_get_peer_full(unsigned int id, const char *uname, int flags)
504 {
505  crm_node_t *node = NULL;
506 
507  CRM_ASSERT(id > 0 || uname != NULL);
508 
509  crm_peer_init();
510 
511  if (flags & CRM_GET_PEER_REMOTE) {
512  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
513  }
514 
515  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
516  node = crm_get_peer(id, uname);
517  }
518  return node;
519 }
520 
521 crm_node_t *
522 crm_find_peer(unsigned int id, const char *uname)
523 {
524  GHashTableIter iter;
525  crm_node_t *node = NULL;
526  crm_node_t *by_id = NULL;
527  crm_node_t *by_name = NULL;
528 
529  CRM_ASSERT(id > 0 || uname != NULL);
530 
531  crm_peer_init();
532 
533  if (uname != NULL) {
534  g_hash_table_iter_init(&iter, crm_peer_cache);
535  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
536  if(node->uname && strcasecmp(node->uname, uname) == 0) {
537  crm_trace("Name match: %s = %p", node->uname, node);
538  by_name = node;
539  break;
540  }
541  }
542  }
543 
544  if (id > 0) {
545  g_hash_table_iter_init(&iter, crm_peer_cache);
546  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
547  if(node->id == id) {
548  crm_trace("ID match: %u = %p", node->id, node);
549  by_id = node;
550  break;
551  }
552  }
553  }
554 
555  node = by_id; /* Good default */
556  if(by_id == by_name) {
557  /* Nothing to do if they match (both NULL counts) */
558  crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
559 
560  } else if(by_id == NULL && by_name) {
561  crm_trace("Only one: %p for %u/%s", by_name, id, uname);
562 
563  if(id && by_name->id) {
564  crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
565  crm_crit("Node %u and %u share the same name '%s'",
566  id, by_name->id, uname);
567  node = NULL; /* Create a new one */
568 
569  } else {
570  node = by_name;
571  }
572 
573  } else if(by_name == NULL && by_id) {
574  crm_trace("Only one: %p for %u/%s", by_id, id, uname);
575 
576  if(uname && by_id->uname) {
577  crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
578  crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
579  uname, by_id->uname, id, uname);
580  }
581 
582  } else if(uname && by_id->uname) {
583  if(safe_str_eq(uname, by_id->uname)) {
584  crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
585  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
586 
587  } else {
588  crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
589  crm_dump_peer_hash(LOG_INFO, __FUNCTION__);
590  crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE);
591  }
592 
593  } else if(id && by_name->id) {
594  crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
595 
596  } else {
597  /* Simple merge */
598 
599  /* Only corosync-based clusters use node IDs. The functions that call
600  * crm_update_peer_state() and crm_update_peer_proc() only know nodeid,
601  * so 'by_id' is authoritative when merging.
602  */
603  crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__);
604 
605  crm_info("Merging %p into %p", by_name, by_id);
606  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
607  }
608 
609  return node;
610 }
611 
612 #if SUPPORT_COROSYNC
613 static guint
614 crm_remove_conflicting_peer(crm_node_t *node)
615 {
616  int matches = 0;
617  GHashTableIter iter;
618  crm_node_t *existing_node = NULL;
619 
620  if (node->id == 0 || node->uname == NULL) {
621  return 0;
622  }
623 
624  if (corosync_cmap_has_config("nodelist") != 0) {
625  return 0;
626  }
627 
628  g_hash_table_iter_init(&iter, crm_peer_cache);
629  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
630  if (existing_node->id > 0
631  && existing_node->id != node->id
632  && existing_node->uname != NULL
633  && strcasecmp(existing_node->uname, node->uname) == 0) {
634 
635  if (crm_is_peer_active(existing_node)) {
636  continue;
637  }
638 
639  crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
640  existing_node->id, existing_node->uname, node->id);
641 
642  g_hash_table_iter_remove(&iter);
643  matches++;
644  }
645  }
646 
647  return matches;
648 }
649 #endif
650 
651 /* coverity[-alloc] Memory is referenced in one or both hashtables */
652 crm_node_t *
653 crm_get_peer(unsigned int id, const char *uname)
654 {
655  crm_node_t *node = NULL;
656  char *uname_lookup = NULL;
657 
658  CRM_ASSERT(id > 0 || uname != NULL);
659 
660  crm_peer_init();
661 
662  node = crm_find_peer(id, uname);
663 
664  /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
665  * we need to do a lookup of the node name using the id in the cluster membership. */
666  if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
667  uname_lookup = get_node_name(id);
668  }
669 
670  if (uname_lookup) {
671  uname = uname_lookup;
672  crm_trace("Inferred a name of '%s' for node %u", uname, id);
673 
674  /* try to turn up the node one more time now that we know the uname. */
675  if (node == NULL) {
676  node = crm_find_peer(id, uname);
677  }
678  }
679 
680 
681  if (node == NULL) {
682  char *uniqueid = crm_generate_uuid();
683 
684  node = calloc(1, sizeof(crm_node_t));
685  CRM_ASSERT(node);
686 
687  crm_info("Created entry %s/%p for node %s/%u (%d total)",
688  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
689  g_hash_table_replace(crm_peer_cache, uniqueid, node);
690  }
691 
692  if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
693  crm_info("Node %u is now known as %s", id, uname);
694  }
695 
696  if(id > 0 && node->id == 0) {
697  node->id = id;
698  }
699 
700  if (uname && (node->uname == NULL)) {
701  crm_update_peer_uname(node, uname);
702  }
703 
704  if(node->uuid == NULL) {
705  const char *uuid = crm_peer_uuid(node);
706 
707  if (uuid) {
708  crm_info("Node %u has uuid %s", id, uuid);
709 
710  } else {
711  crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
712  }
713  }
714 
715  free(uname_lookup);
716 
717  return node;
718 }
719 
731 void
732 crm_update_peer_uname(crm_node_t *node, const char *uname)
733 {
734  CRM_CHECK(uname != NULL,
735  crm_err("Bug: can't update node name without name"); return);
736  CRM_CHECK(node != NULL,
737  crm_err("Bug: can't update node name to %s without node", uname);
738  return);
739 
740  if (safe_str_eq(uname, node->uname)) {
741  crm_debug("Node uname '%s' did not change", uname);
742  return;
743  }
744 
745  for (const char *c = uname; *c; ++c) {
746  if ((*c >= 'A') && (*c <= 'Z')) {
747  crm_warn("Node names with capitals are discouraged, consider changing '%s'",
748  uname);
749  break;
750  }
751  }
752 
753  free(node->uname);
754  node->uname = strdup(uname);
755  CRM_ASSERT(node->uname != NULL);
756 
757  if (crm_status_callback) {
759  }
760 
761 #if SUPPORT_COROSYNC
762  if (is_corosync_cluster() && !is_set(node->flags, crm_remote_node)) {
763  crm_remove_conflicting_peer(node);
764  }
765 #endif
766 }
767 
784 crm_node_t *
785 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
786 {
787  uint32_t last = 0;
788  gboolean changed = FALSE;
789 
790  CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
791  source, peer2text(flag), status); return NULL);
792 
793  /* Pacemaker doesn't spawn processes on remote nodes */
794  if (is_set(node->flags, crm_remote_node)) {
795  return node;
796  }
797 
798  last = node->processes;
799  if (status == NULL) {
800  node->processes = flag;
801  if (node->processes != last) {
802  changed = TRUE;
803  }
804 
805  } else if (safe_str_eq(status, ONLINESTATUS)) {
806  if ((node->processes & flag) != flag) {
807  set_bit(node->processes, flag);
808  changed = TRUE;
809  }
810 
811  } else if (node->processes & flag) {
812  clear_bit(node->processes, flag);
813  changed = TRUE;
814  }
815 
816  if (changed) {
817  if (status == NULL && flag <= crm_proc_none) {
818  crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
819  node->id);
820  } else {
821  crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
822  peer2text(flag), status);
823  }
824 
825  /* Call the client callback first, then update the peer state,
826  * in case the node will be reaped
827  */
828  if (crm_status_callback) {
830  }
831 
832  /* The client callback shouldn't touch the peer caches,
833  * but as a safety net, bail if the peer cache was destroyed.
834  */
835  if (crm_peer_cache == NULL) {
836  return NULL;
837  }
838 
839  if (crm_autoreap) {
840  node = crm_update_peer_state(__FUNCTION__, node,
841  is_set(node->processes, crm_get_cluster_proc())?
843  }
844  } else {
845  crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
846  peer2text(flag), status);
847  }
848  return node;
849 }
850 
851 void
852 crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected)
853 {
854  char *last = NULL;
855  gboolean changed = FALSE;
856 
857  CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
858  return);
859 
860  /* Remote nodes don't participate in joins */
861  if (is_set(node->flags, crm_remote_node)) {
862  return;
863  }
864 
865  last = node->expected;
866  if (expected != NULL && safe_str_neq(node->expected, expected)) {
867  node->expected = strdup(expected);
868  changed = TRUE;
869  }
870 
871  if (changed) {
872  crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
873  expected, last);
874  free(last);
875  } else {
876  crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
877  node->id, expected);
878  }
879 }
880 
897 static crm_node_t *
898 crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, int membership, GHashTableIter *iter)
899 {
900  gboolean is_member;
901 
902  CRM_CHECK(node != NULL,
903  crm_err("Could not set state for unknown host to %s"
904  CRM_XS " source=%s", state, source);
905  return NULL);
906 
907  is_member = safe_str_eq(state, CRM_NODE_MEMBER);
908  if (is_member) {
909  node->when_lost = 0;
910  if (membership) {
911  node->last_seen = membership;
912  }
913  }
914 
915  if (state && safe_str_neq(node->state, state)) {
916  char *last = node->state;
917 
918  node->state = strdup(state);
919  crm_notice("Node %s state is now %s " CRM_XS
920  " nodeid=%u previous=%s source=%s", node->uname, state,
921  node->id, (last? last : "unknown"), source);
922  if (crm_status_callback) {
924  }
925  free(last);
926 
927  if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) {
928  /* We only autoreap from the peer cache, not the remote peer cache,
929  * because the latter should be managed only by
930  * crm_remote_peer_cache_refresh().
931  */
932  if(iter) {
933  crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
934  g_hash_table_iter_remove(iter);
935 
936  } else {
937  reap_crm_member(node->id, node->uname);
938  }
939  node = NULL;
940  }
941 
942  } else {
943  crm_trace("Node %s state is unchanged (%s) " CRM_XS
944  " nodeid=%u source=%s", node->uname, state, node->id, source);
945  }
946  return node;
947 }
948 
964 crm_node_t *
965 crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership)
966 {
967  return crm_update_peer_state_iter(source, node, state, membership, NULL);
968 }
969 
976 void
977 crm_reap_unseen_nodes(uint64_t membership)
978 {
979  GHashTableIter iter;
980  crm_node_t *node = NULL;
981 
982  crm_trace("Reaping unseen nodes...");
983  g_hash_table_iter_init(&iter, crm_peer_cache);
984  while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
985  if (node->last_seen != membership) {
986  if (node->state) {
987  /*
988  * Calling crm_update_peer_state_iter() allows us to
989  * remove the node from crm_peer_cache without
990  * invalidating our iterator
991  */
992  crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter);
993 
994  } else {
995  crm_info("State of node %s[%u] is still unknown",
996  node->uname, node->id);
997  }
998  }
999  }
1000 }
1001 
1002 int
1003 crm_terminate_member(int nodeid, const char *uname, void *unused)
1004 {
1005  /* Always use the synchronous, non-mainloop version */
1006  return stonith_api_kick(nodeid, uname, 120, TRUE);
1007 }
1008 
1009 int
1010 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1011 {
1012  return stonith_api_kick(nodeid, uname, 120, TRUE);
1013 }
1014 
1015 static crm_node_t *
1016 crm_find_known_peer(const char *id, const char *uname)
1017 {
1018  GHashTableIter iter;
1019  crm_node_t *node = NULL;
1020  crm_node_t *by_id = NULL;
1021  crm_node_t *by_name = NULL;
1022 
1023  if (uname) {
1024  g_hash_table_iter_init(&iter, crm_known_peer_cache);
1025  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1026  if (node->uname && strcasecmp(node->uname, uname) == 0) {
1027  crm_trace("Name match: %s = %p", node->uname, node);
1028  by_name = node;
1029  break;
1030  }
1031  }
1032  }
1033 
1034  if (id) {
1035  g_hash_table_iter_init(&iter, crm_known_peer_cache);
1036  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1037  if(strcasecmp(node->uuid, id) == 0) {
1038  crm_trace("ID match: %s= %p", id, node);
1039  by_id = node;
1040  break;
1041  }
1042  }
1043  }
1044 
1045  node = by_id; /* Good default */
1046  if (by_id == by_name) {
1047  /* Nothing to do if they match (both NULL counts) */
1048  crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1049 
1050  } else if (by_id == NULL && by_name) {
1051  crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1052 
1053  if (id) {
1054  node = NULL;
1055 
1056  } else {
1057  node = by_name;
1058  }
1059 
1060  } else if (by_name == NULL && by_id) {
1061  crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1062 
1063  if (uname) {
1064  node = NULL;
1065  }
1066 
1067  } else if (uname && by_id->uname
1068  && safe_str_eq(uname, by_id->uname)) {
1069  /* Multiple nodes have the same uname in the CIB.
1070  * Return by_id. */
1071 
1072  } else if (id && by_name->uuid
1073  && safe_str_eq(id, by_name->uuid)) {
1074  /* Multiple nodes have the same id in the CIB.
1075  * Return by_name. */
1076  node = by_name;
1077 
1078  } else {
1079  node = NULL;
1080  }
1081 
1082  if (node == NULL) {
1083  crm_debug("Couldn't find node%s%s%s%s",
1084  id? " " : "",
1085  id? id : "",
1086  uname? " with name " : "",
1087  uname? uname : "");
1088  }
1089 
1090  return node;
1091 }
1092 
1093 static void
1094 known_peer_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1095 {
1096  const char *id = crm_element_value(xml_node, XML_ATTR_ID);
1097  const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME);
1098  crm_node_t * node = NULL;
1099 
1100  CRM_CHECK(id != NULL && uname !=NULL, return);
1101  node = crm_find_known_peer(id, uname);
1102 
1103  if (node == NULL) {
1104  char *uniqueid = crm_generate_uuid();
1105 
1106  node = calloc(1, sizeof(crm_node_t));
1107  CRM_ASSERT(node != NULL);
1108 
1109  node->uname = strdup(uname);
1110  CRM_ASSERT(node->uname != NULL);
1111 
1112  node->uuid = strdup(id);
1113  CRM_ASSERT(node->uuid != NULL);
1114 
1115  g_hash_table_replace(crm_known_peer_cache, uniqueid, node);
1116 
1117  } else if (is_set(node->flags, crm_node_dirty)) {
1118  if (safe_str_neq(uname, node->uname)) {
1119  free(node->uname);
1120  node->uname = strdup(uname);
1121  CRM_ASSERT(node->uname != NULL);
1122  }
1123 
1124  /* Node is in cache and hasn't been updated already, so mark it clean */
1125  clear_bit(node->flags, crm_node_dirty);
1126  }
1127 
1128 }
1129 
1130 #define XPATH_MEMBER_NODE_CONFIG \
1131  "//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES \
1132  "/" XML_CIB_TAG_NODE "[not(@type) or @type='member']"
1133 
1134 static void
1135 crm_known_peer_cache_refresh(xmlNode *cib)
1136 {
1137  crm_peer_init();
1138 
1139  g_hash_table_foreach(crm_known_peer_cache, mark_dirty, NULL);
1140 
1142  known_peer_cache_refresh_helper, NULL);
1143 
1144  /* Remove all old cache entries that weren't seen in the CIB */
1145  g_hash_table_foreach_remove(crm_known_peer_cache, is_dirty, NULL);
1146 }
1147 
1148 void
1150 {
1152  crm_known_peer_cache_refresh(cib);
1153 }
1154 
1155 crm_node_t *
1156 crm_find_known_peer_full(unsigned int id, const char *uname, int flags)
1157 {
1158  crm_node_t *node = NULL;
1159  char *id_str = NULL;
1160 
1161  CRM_ASSERT(id > 0 || uname != NULL);
1162 
1163  node = crm_find_peer_full(id, uname, flags);
1164 
1165  if (node || !(flags & CRM_GET_PEER_CLUSTER)) {
1166  return node;
1167  }
1168 
1169  if (id > 0) {
1170  id_str = crm_strdup_printf("%u", id);
1171  }
1172 
1173  node = crm_find_known_peer(id_str, uname);
1174 
1175  free(id_str);
1176  return node;
1177 }
void crm_peer_destroy(void)
Definition: membership.c:406
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:156
char uname[MAX_NAME]
Definition: internal.h:87
#define crm_notice(fmt, args...)
Definition: logging.h:242
#define CRM_NODE_LOST
Definition: cluster.h:30
#define XPATH_REMOTE_NODE_STATUS
Definition: membership.c:224
void crm_set_status_callback(void(*dispatch)(enum crm_status_type, crm_node_t *, const void *))
Set a client function that will be called after peer status changes.
Definition: membership.c:441
#define crm_crit(fmt, args...)
Definition: logging.h:239
gboolean safe_str_neq(const char *a, const char *b)
Definition: strings.c:157
char * crm_generate_uuid(void)
Definition: utils.c:1078
uint64_t flags
Definition: cluster.h:57
void crm_reap_unseen_nodes(uint64_t ring_id)
Definition: membership.c:977
uint32_t id
Definition: cluster.h:62
gboolean crm_is_peer_active(const crm_node_t *node)
Definition: membership.c:272
gboolean crm_have_quorum
Definition: membership.c:56
char * uuid
Definition: cluster.h:55
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
Definition: st_client.c:2123
int crm_terminate_member(int nodeid, const char *uname, void *unused)
Definition: membership.c:1003
void crm_set_autoreap(gboolean autoreap)
Tell the library whether to automatically reap lost nodes.
Definition: membership.c:458
void crm_peer_init(void)
Definition: membership.c:390
GHashTable * crm_remote_peer_cache
Definition: membership.c:51
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
Definition: membership.c:653
guint crm_active_peers(void)
Definition: membership.c:365
#define clear_bit(word, bit)
Definition: crm_internal.h:168
char * get_node_name(uint32_t nodeid)
Definition: cluster.c:130
crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:503
gboolean crm_is_corosync_peer_active(const crm_node_t *node)
Definition: corosync.c:512
uint32_t id
Definition: internal.h:82
#define crm_warn(fmt, args...)
Definition: logging.h:241
#define set_bit(word, bit)
Definition: crm_internal.h:167
uint32_t processes
Definition: cluster.h:59
void crm_remote_peer_cache_remove(const char *node_name)
Definition: membership.c:120
#define crm_debug(fmt, args...)
Definition: logging.h:245
#define XML_ATTR_ID
Definition: msg_xml.h:96
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:423
time_t when_lost
Definition: cluster.h:63
#define XPATH_GUEST_NODE_CONFIG
Definition: membership.c:213
crm_status_type
Definition: cluster.h:163
#define crm_trace(fmt, args...)
Definition: logging.h:246
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:121
crm_node_t * crm_update_peer_proc(const char *source, crm_node_t *peer, uint32_t flag, const char *status)
Definition: membership.c:785
guint crm_strcase_hash(gconstpointer v)
Definition: strings.c:376
#define XML_ATTR_UNAME
Definition: msg_xml.h:118
int corosync_cmap_has_config(const char *prefix)
Definition: corosync.c:692
#define CRM_NODE_MEMBER
Definition: cluster.h:31
void(* crm_status_callback)(enum crm_status_type, crm_node_t *, const void *)
Definition: membership.c:428
unsigned long long crm_peer_seq
Definition: membership.c:55
guint reap_crm_member(uint32_t id, const char *name)
Remove all peer cache entries matching a node ID and/or uname.
Definition: membership.c:324
const char * name_for_cluster_type(enum cluster_type_e type)
Definition: cluster.c:234
char * expected
Definition: cluster.h:67
void crm_update_peer_expected(const char *source, crm_node_t *node, const char *expected)
Definition: membership.c:852
gboolean is_corosync_cluster(void)
Definition: cluster.c:328
crm_node_t * crm_find_known_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:1156
#define CRM_XS
Definition: logging.h:34
crm_node_t * crm_update_peer_state(const char *source, crm_node_t *node, const char *state, int membership)
Update a node&#39;s state and membership information.
Definition: membership.c:965
crm_node_t * crm_remote_peer_get(const char *node_name)
Get a remote node peer cache entry, creating it if necessary.
Definition: membership.c:80
#define crm_err(fmt, args...)
Definition: logging.h:240
#define CRM_ASSERT(expr)
Definition: results.h:42
crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:484
Fencing aka. STONITH.
char data[0]
Definition: internal.h:92
char * state
Definition: cluster.h:56
void crm_foreach_xpath_result(xmlNode *xml, const char *xpath, void(*helper)(xmlNode *, void *), void *user_data)
Run a supplied function for each result of an xpath search.
Definition: xpath.c:179
Wrappers for and extensions to libqb IPC.
GHashTable * crm_known_peer_cache
Definition: membership.c:53
char * uname
Definition: cluster.h:54
uint64_t last_seen
Definition: cluster.h:58
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:240
gboolean crm_is_true(const char *s)
Definition: strings.c:172
void crm_update_peer_uname(crm_node_t *node, const char *uname)
Definition: membership.c:732
#define safe_str_eq(a, b)
Definition: util.h:59
#define ONLINESTATUS
Definition: util.h:36
void crm_peer_caches_refresh(xmlNode *cib)
Definition: membership.c:1149
void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork)
Definition: utils.c:625
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
Definition: membership.c:1010
crm_node_t * crm_find_peer(unsigned int id, const char *uname)
Definition: membership.c:522
char * crm_strdup_printf(char const *format,...) __attribute__((__format__(__printf__
#define XPATH_MEMBER_NODE_CONFIG
Definition: membership.c:1130
void crm_remote_peer_cache_refresh(xmlNode *cib)
Repopulate the remote peer cache based on CIB XML.
Definition: membership.c:234
GHashTable * crm_peer_cache
Definition: membership.c:34
#define XPATH_REMOTE_NODE_CONFIG
Definition: membership.c:219
#define crm_info(fmt, args...)
Definition: logging.h:243
const char * crm_peer_uuid(crm_node_t *node)
Definition: cluster.c:28
uint64_t flags
Definition: remote.c:148
#define s_if_plural(i)
Definition: membership.c:25
gboolean crm_strcase_equal(gconstpointer a, gconstpointer b)
Definition: strings.c:370
enum cluster_type_e get_cluster_type(void)
Definition: cluster.c:273
int crm_remote_peer_cache_size(void)
Definition: membership.c:60