pacemaker  1.1.18-7fdfbbe
Scalable High-Availability cluster resource manager
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
membership.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 #include <crm_internal.h>
19 
20 #ifndef _GNU_SOURCE
21 # define _GNU_SOURCE
22 #endif
23 
24 #include <sys/param.h>
25 #include <sys/types.h>
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <string.h>
29 #include <glib.h>
30 #include <crm/common/ipc.h>
31 #include <crm/cluster/internal.h>
32 #include <crm/msg_xml.h>
33 #include <crm/stonith-ng.h>
34 
35 #define s_if_plural(i) (((i) == 1)? "" : "s")
36 
37 /* The peer cache remembers cluster nodes that have been seen.
38  * This is managed mostly automatically by libcluster, based on
39  * cluster membership events.
40  *
41  * Because cluster nodes can have conflicting names or UUIDs,
42  * the hash table key is a uniquely generated ID.
43  */
44 GHashTable *crm_peer_cache = NULL;
45 
46 /*
47  * The remote peer cache tracks pacemaker_remote nodes. While the
48  * value has the same type as the peer cache's, it is tracked separately for
49  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
50  * so the name (which is also the UUID) is used as the hash table key; there
51  * is no equivalent of membership events, so management is not automatic; and
52  * most users of the peer cache need to exclude pacemaker_remote nodes.
53  *
54  * That said, using a single cache would be more logical and less error-prone,
55  * so it would be a good idea to merge them one day.
56  *
57  * libcluster provides two avenues for populating the cache:
58  * crm_remote_peer_get(), crm_remote_peer_cache_add() and
59  * crm_remote_peer_cache_remove() directly manage it,
60  * while crm_remote_peer_cache_refresh() populates it via the CIB.
61  */
62 GHashTable *crm_remote_peer_cache = NULL;
63 
64 unsigned long long crm_peer_seq = 0;
65 gboolean crm_have_quorum = FALSE;
66 static gboolean crm_autoreap = TRUE;
67 
68 int
70 {
71  if (crm_remote_peer_cache == NULL) {
72  return 0;
73  }
74  return g_hash_table_size(crm_remote_peer_cache);
75 }
76 
88 crm_node_t *
89 crm_remote_peer_get(const char *node_name)
90 {
91  crm_node_t *node;
92 
93  if (node_name == NULL) {
94  errno = -EINVAL;
95  return NULL;
96  }
97 
98  /* Return existing cache entry if one exists */
99  node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
100  if (node) {
101  return node;
102  }
103 
104  /* Allocate a new entry */
105  node = calloc(1, sizeof(crm_node_t));
106  if (node == NULL) {
107  return NULL;
108  }
109 
110  /* Populate the essential information */
111  node->flags = crm_remote_node;
112  node->uuid = strdup(node_name);
113  if (node->uuid == NULL) {
114  free(node);
115  errno = -ENOMEM;
116  return NULL;
117  }
118 
119  /* Add the new entry to the cache */
120  g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
121  crm_trace("added %s to remote cache", node_name);
122 
123  /* Update the entry's uname, ensuring peer status callbacks are called */
124  crm_update_peer_uname(node, node_name);
125  return node;
126 }
127 
136 void
137 crm_remote_peer_cache_add(const char *node_name)
138 {
139  CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
140 }
141 
142 void
143 crm_remote_peer_cache_remove(const char *node_name)
144 {
145  if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
146  crm_trace("removed %s from remote peer cache", node_name);
147  }
148 }
149 
161 static const char *
162 remote_state_from_cib(xmlNode *node_state)
163 {
164  const char *status;
165 
166  status = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
167  if (status && !crm_is_true(status)) {
168  status = CRM_NODE_LOST;
169  } else {
170  status = CRM_NODE_MEMBER;
171  }
172  return status;
173 }
174 
175 /* user data for looping through remote node xpath searches */
176 struct refresh_data {
177  const char *field; /* XML attribute to check for node name */
178  gboolean has_state; /* whether to update node state based on XML */
179 };
180 
188 static void
189 remote_cache_refresh_helper(xmlNode *result, void *user_data)
190 {
191  struct refresh_data *data = user_data;
192  const char *remote = crm_element_value(result, data->field);
193  const char *state = NULL;
194  crm_node_t *node;
195 
196  CRM_CHECK(remote != NULL, return);
197 
198  /* Determine node's state, if the result has it */
199  if (data->has_state) {
200  state = remote_state_from_cib(result);
201  }
202 
203  /* Check whether cache already has entry for node */
204  node = g_hash_table_lookup(crm_remote_peer_cache, remote);
205 
206  if (node == NULL) {
207  /* Node is not in cache, so add a new entry for it */
208  node = crm_remote_peer_get(remote);
209  CRM_ASSERT(node);
210  if (state) {
211  crm_update_peer_state(__FUNCTION__, node, state, 0);
212  }
213 
214  } else if (is_set(node->flags, crm_node_dirty)) {
215  /* Node is in cache and hasn't been updated already, so mark it clean */
217  if (state) {
218  crm_update_peer_state(__FUNCTION__, node, state, 0);
219  }
220  }
221 }
222 
223 static void
224 mark_dirty(gpointer key, gpointer value, gpointer user_data)
225 {
226  set_bit(((crm_node_t*)value)->flags, crm_node_dirty);
227 }
228 
229 static gboolean
230 is_dirty(gpointer key, gpointer value, gpointer user_data)
231 {
232  return is_set(((crm_node_t*)value)->flags, crm_node_dirty);
233 }
234 
235 /* search string to find CIB resources entries for guest nodes */
236 #define XPATH_GUEST_NODE_CONFIG \
237  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
238  "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \
239  "[@name='" XML_RSC_ATTR_REMOTE_NODE "']"
240 
241 /* search string to find CIB resources entries for remote nodes */
242 #define XPATH_REMOTE_NODE_CONFIG \
243  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
244  "[@type='remote'][@provider='pacemaker']"
245 
246 /* search string to find CIB node status entries for pacemaker_remote nodes */
247 #define XPATH_REMOTE_NODE_STATUS \
248  "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \
249  "[@" XML_NODE_IS_REMOTE "='true']"
250 
256 void
258 {
259  struct refresh_data data;
260 
261  crm_peer_init();
262 
263  /* First, we mark all existing cache entries as dirty,
264  * so that later we can remove any that weren't in the CIB.
265  * We don't empty the cache, because we need to detect changes in state.
266  */
267  g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
268 
269  /* Look for guest nodes and remote nodes in the status section */
270  data.field = "id";
271  data.has_state = TRUE;
273  remote_cache_refresh_helper, &data);
274 
275  /* Look for guest nodes and remote nodes in the configuration section,
276  * because they may have just been added and not have a status entry yet.
277  * In that case, the cached node state will be left NULL, so that the
278  * peer status callback isn't called until we're sure the node started
279  * successfully.
280  */
281  data.field = "value";
282  data.has_state = FALSE;
284  remote_cache_refresh_helper, &data);
285  data.field = "id";
286  data.has_state = FALSE;
288  remote_cache_refresh_helper, &data);
289 
290  /* Remove all old cache entries that weren't seen in the CIB */
291  g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
292 }
293 
294 gboolean
296 {
297  if(node == NULL) {
298  return FALSE;
299  }
300 
301  if (is_set(node->flags, crm_remote_node)) {
302  /* remote nodes are never considered active members. This
303  * guarantees they will never be considered for DC membership.*/
304  return FALSE;
305  }
306 #if SUPPORT_COROSYNC
307  if (is_openais_cluster()) {
308  return crm_is_corosync_peer_active(node);
309  }
310 #endif
311 #if SUPPORT_HEARTBEAT
312  if (is_heartbeat_cluster()) {
313  return crm_is_heartbeat_peer_active(node);
314  }
315 #endif
316  crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
317  return FALSE;
318 }
319 
320 static gboolean
321 crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
322 {
323  crm_node_t *node = value;
324  crm_node_t *search = user_data;
325 
326  if (search == NULL) {
327  return FALSE;
328 
329  } else if (search->id && node->id != search->id) {
330  return FALSE;
331 
332  } else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) {
333  return FALSE;
334 
335  } else if (crm_is_peer_active(value) == FALSE) {
336  crm_info("Removing node with name %s and id %u from membership cache",
337  (node->uname? node->uname : "unknown"), node->id);
338  return TRUE;
339  }
340  return FALSE;
341 }
342 
351 guint
352 reap_crm_member(uint32_t id, const char *name)
353 {
354  int matches = 0;
355  crm_node_t search;
356 
357  if (crm_peer_cache == NULL) {
358  crm_trace("Membership cache not initialized, ignoring purge request");
359  return 0;
360  }
361 
362  search.id = id;
363  search.uname = name ? strdup(name) : NULL;
364  matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
365  if(matches) {
366  crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache",
367  matches, s_if_plural(matches), search.id,
368  (search.uname? " and/or uname=" : ""),
369  (search.uname? search.uname : ""));
370 
371  } else {
372  crm_info("No peers with id=%u%s%s to purge from the membership cache",
373  search.id, (search.uname? " and/or uname=" : ""),
374  (search.uname? search.uname : ""));
375  }
376 
377  free(search.uname);
378  return matches;
379 }
380 
381 static void
382 crm_count_peer(gpointer key, gpointer value, gpointer user_data)
383 {
384  guint *count = user_data;
385  crm_node_t *node = value;
386 
387  if (crm_is_peer_active(node)) {
388  *count = *count + 1;
389  }
390 }
391 
392 guint
394 {
395  guint count = 0;
396 
397  if (crm_peer_cache) {
398  g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
399  }
400  return count;
401 }
402 
403 static void
404 destroy_crm_node(gpointer data)
405 {
406  crm_node_t *node = data;
407 
408  crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
409 
410  free(node->addr);
411  free(node->uname);
412  free(node->state);
413  free(node->uuid);
414  free(node->expected);
415  free(node);
416 }
417 
418 void
420 {
421  if (crm_peer_cache == NULL) {
422  crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
423  }
424 
425  if (crm_remote_peer_cache == NULL) {
426  crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
427  }
428 }
429 
430 void
432 {
433  if (crm_peer_cache != NULL) {
434  crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
435  g_hash_table_destroy(crm_peer_cache);
436  crm_peer_cache = NULL;
437  }
438 
439  if (crm_remote_peer_cache != NULL) {
440  crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
441  g_hash_table_destroy(crm_remote_peer_cache);
442  crm_remote_peer_cache = NULL;
443  }
444 }
445 
446 void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
447 
458 void
459 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
460 {
461  crm_status_callback = dispatch;
462 }
463 
475 void
476 crm_set_autoreap(gboolean autoreap)
477 {
478  crm_autoreap = autoreap;
479 }
480 
481 static void crm_dump_peer_hash(int level, const char *caller)
482 {
483  GHashTableIter iter;
484  const char *id = NULL;
485  crm_node_t *node = NULL;
486 
487  g_hash_table_iter_init(&iter, crm_peer_cache);
488  while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
489  do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
490  }
491 }
492 
493 static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
494 {
495  if(value == user_data) {
496  return TRUE;
497  }
498  return FALSE;
499 }
500 
501 crm_node_t *
502 crm_find_peer_full(unsigned int id, const char *uname, int flags)
503 {
504  crm_node_t *node = NULL;
505 
506  CRM_ASSERT(id > 0 || uname != NULL);
507 
508  crm_peer_init();
509 
510  if (flags & CRM_GET_PEER_REMOTE) {
511  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
512  }
513 
514  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
515  node = crm_find_peer(id, uname);
516  }
517  return node;
518 }
519 
520 crm_node_t *
521 crm_get_peer_full(unsigned int id, const char *uname, int flags)
522 {
523  crm_node_t *node = NULL;
524 
525  CRM_ASSERT(id > 0 || uname != NULL);
526 
527  crm_peer_init();
528 
529  if (flags & CRM_GET_PEER_REMOTE) {
530  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
531  }
532 
533  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
534  node = crm_get_peer(id, uname);
535  }
536  return node;
537 }
538 
539 crm_node_t *
540 crm_find_peer(unsigned int id, const char *uname)
541 {
542  GHashTableIter iter;
543  crm_node_t *node = NULL;
544  crm_node_t *by_id = NULL;
545  crm_node_t *by_name = NULL;
546 
547  CRM_ASSERT(id > 0 || uname != NULL);
548 
549  crm_peer_init();
550 
551  if (uname != NULL) {
552  g_hash_table_iter_init(&iter, crm_peer_cache);
553  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
554  if(node->uname && strcasecmp(node->uname, uname) == 0) {
555  crm_trace("Name match: %s = %p", node->uname, node);
556  by_name = node;
557  break;
558  }
559  }
560  }
561 
562  if (id > 0) {
563  g_hash_table_iter_init(&iter, crm_peer_cache);
564  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
565  if(node->id == id) {
566  crm_trace("ID match: %u = %p", node->id, node);
567  by_id = node;
568  break;
569  }
570  }
571  }
572 
573  node = by_id; /* Good default */
574  if(by_id == by_name) {
575  /* Nothing to do if they match (both NULL counts) */
576  crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
577 
578  } else if(by_id == NULL && by_name) {
579  crm_trace("Only one: %p for %u/%s", by_name, id, uname);
580 
581  if(id && by_name->id) {
582  crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
583  crm_crit("Node %u and %u share the same name '%s'",
584  id, by_name->id, uname);
585  node = NULL; /* Create a new one */
586 
587  } else {
588  node = by_name;
589  }
590 
591  } else if(by_name == NULL && by_id) {
592  crm_trace("Only one: %p for %u/%s", by_id, id, uname);
593 
594  if(uname && by_id->uname) {
595  crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
596  crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
597  uname, by_id->uname, id, uname);
598  }
599 
600  } else if(uname && by_id->uname) {
601  if(safe_str_eq(uname, by_id->uname)) {
602  crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
603  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
604 
605  } else {
606  crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
607  crm_dump_peer_hash(LOG_INFO, __FUNCTION__);
608  crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE);
609  }
610 
611  } else if(id && by_name->id) {
612  crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
613 
614  } else {
615  /* Simple merge */
616 
617  /* Only corosync based clusters use nodeid's
618  *
619  * The functions that call crm_update_peer_state() only know nodeid
620  * so 'by_id' is authorative when merging
621  *
622  * Same for crm_update_peer_proc()
623  */
624  crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__);
625 
626  crm_info("Merging %p into %p", by_name, by_id);
627  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
628  }
629 
630  return node;
631 }
632 
633 #if SUPPORT_COROSYNC
634 static guint
635 crm_remove_conflicting_peer(crm_node_t *node)
636 {
637  int matches = 0;
638  GHashTableIter iter;
639  crm_node_t *existing_node = NULL;
640 
641  if (node->id == 0 || node->uname == NULL) {
642  return 0;
643  }
644 
645 # if !SUPPORT_PLUGIN
646  if (corosync_cmap_has_config("nodelist") != 0) {
647  return 0;
648  }
649 # endif
650 
651  g_hash_table_iter_init(&iter, crm_peer_cache);
652  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
653  if (existing_node->id > 0
654  && existing_node->id != node->id
655  && existing_node->uname != NULL
656  && strcasecmp(existing_node->uname, node->uname) == 0) {
657 
658  if (crm_is_peer_active(existing_node)) {
659  continue;
660  }
661 
662  crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
663  existing_node->id, existing_node->uname, node->id);
664 
665  g_hash_table_iter_remove(&iter);
666  matches++;
667  }
668  }
669 
670  return matches;
671 }
672 #endif
673 
674 /* coverity[-alloc] Memory is referenced in one or both hashtables */
675 crm_node_t *
676 crm_get_peer(unsigned int id, const char *uname)
677 {
678  crm_node_t *node = NULL;
679  char *uname_lookup = NULL;
680 
681  CRM_ASSERT(id > 0 || uname != NULL);
682 
683  crm_peer_init();
684 
685  node = crm_find_peer(id, uname);
686 
687  /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
688  * we need to do a lookup of the node name using the id in the cluster membership. */
689  if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
690  uname_lookup = get_node_name(id);
691  }
692 
693  if (uname_lookup) {
694  uname = uname_lookup;
695  crm_trace("Inferred a name of '%s' for node %u", uname, id);
696 
697  /* try to turn up the node one more time now that we know the uname. */
698  if (node == NULL) {
699  node = crm_find_peer(id, uname);
700  }
701  }
702 
703 
704  if (node == NULL) {
705  char *uniqueid = crm_generate_uuid();
706 
707  node = calloc(1, sizeof(crm_node_t));
708  CRM_ASSERT(node);
709 
710  crm_info("Created entry %s/%p for node %s/%u (%d total)",
711  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
712  g_hash_table_replace(crm_peer_cache, uniqueid, node);
713  }
714 
715  if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
716  crm_info("Node %u is now known as %s", id, uname);
717  }
718 
719  if(id > 0 && node->id == 0) {
720  node->id = id;
721  }
722 
723  if (uname && (node->uname == NULL)) {
724  crm_update_peer_uname(node, uname);
725  }
726 
727  if(node->uuid == NULL) {
728  const char *uuid = crm_peer_uuid(node);
729 
730  if (uuid) {
731  crm_info("Node %u has uuid %s", id, uuid);
732 
733  } else {
734  crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
735  }
736  }
737 
738  free(uname_lookup);
739 
740  return node;
741 }
742 
754 crm_node_t *
755 crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes,
756  uint32_t children, const char *uuid, const char *uname, const char *addr,
757  const char *state)
758 {
759 #if SUPPORT_PLUGIN
760  gboolean addr_changed = FALSE;
761  gboolean votes_changed = FALSE;
762 #endif
763  crm_node_t *node = NULL;
764 
765  id = get_corosync_id(id, uuid);
766  node = crm_get_peer(id, uname);
767 
768  CRM_ASSERT(node != NULL);
769 
770  if (node->uuid == NULL) {
771  if (is_openais_cluster()) {
772  /* Yes, overrule whatever was passed in */
773  crm_peer_uuid(node);
774 
775  } else if (uuid != NULL) {
776  node->uuid = strdup(uuid);
777  }
778  }
779 
780  if (children > 0) {
781  if (crm_update_peer_proc(source, node, children, state) == NULL) {
782  return NULL;
783  }
784  }
785 
786  if (state != NULL) {
787  if (crm_update_peer_state(source, node, state, seen) == NULL) {
788  return NULL;
789  }
790  }
791 #if SUPPORT_HEARTBEAT
792  if (born != 0) {
793  node->born = born;
794  }
795 #endif
796 
797 #if SUPPORT_PLUGIN
798  /* These were only used by the plugin */
799  if (born != 0) {
800  node->born = born;
801  }
802 
803  if (votes > 0 && node->votes != votes) {
804  votes_changed = TRUE;
805  node->votes = votes;
806  }
807 
808  if (addr != NULL) {
809  if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) {
810  addr_changed = TRUE;
811  free(node->addr);
812  node->addr = strdup(addr);
813  }
814  }
815  if (addr_changed || votes_changed) {
816  crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T
817  " proc=%.32x", source, node->uname, node->id, node->state,
818  node->addr, addr_changed ? " (new)" : "", node->votes,
819  votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes);
820  }
821 #endif
822 
823  return node;
824 }
825 
837 void
838 crm_update_peer_uname(crm_node_t *node, const char *uname)
839 {
840  CRM_CHECK(uname != NULL,
841  crm_err("Bug: can't update node name without name"); return);
842  CRM_CHECK(node != NULL,
843  crm_err("Bug: can't update node name to %s without node", uname);
844  return);
845 
846  if (safe_str_eq(uname, node->uname)) {
847  crm_debug("Node uname '%s' did not change", uname);
848  return;
849  }
850 
851  for (const char *c = uname; *c; ++c) {
852  if ((*c >= 'A') && (*c <= 'Z')) {
853  crm_warn("Node names with capitals are discouraged, consider changing '%s'",
854  uname);
855  break;
856  }
857  }
858 
859  free(node->uname);
860  node->uname = strdup(uname);
861  CRM_ASSERT(node->uname != NULL);
862 
863  if (crm_status_callback) {
865  }
866 
867 #if SUPPORT_COROSYNC
868  if (is_openais_cluster() && !is_set(node->flags, crm_remote_node)) {
869  crm_remove_conflicting_peer(node);
870  }
871 #endif
872 }
873 
890 crm_node_t *
891 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
892 {
893  uint32_t last = 0;
894  gboolean changed = FALSE;
895 
896  CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
897  source, peer2text(flag), status); return NULL);
898 
899  /* Pacemaker doesn't spawn processes on remote nodes */
900  if (is_set(node->flags, crm_remote_node)) {
901  return node;
902  }
903 
904  last = node->processes;
905  if (status == NULL) {
906  node->processes = flag;
907  if (node->processes != last) {
908  changed = TRUE;
909  }
910 
911  } else if (safe_str_eq(status, ONLINESTATUS)) {
912  if ((node->processes & flag) != flag) {
913  set_bit(node->processes, flag);
914  changed = TRUE;
915  }
916 #if SUPPORT_PLUGIN
917  } else if (safe_str_eq(status, CRM_NODE_MEMBER)) {
918  if (flag > 0 && node->processes != flag) {
919  node->processes = flag;
920  changed = TRUE;
921  }
922 #endif
923 
924  } else if (node->processes & flag) {
925  clear_bit(node->processes, flag);
926  changed = TRUE;
927  }
928 
929  if (changed) {
930  if (status == NULL && flag <= crm_proc_none) {
931  crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
932  node->id);
933  } else {
934  crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
935  peer2text(flag), status);
936  }
937 
938  /* Call the client callback first, then update the peer state,
939  * in case the node will be reaped
940  */
941  if (crm_status_callback) {
943  }
944 
945  /* The client callback shouldn't touch the peer caches,
946  * but as a safety net, bail if the peer cache was destroyed.
947  */
948  if (crm_peer_cache == NULL) {
949  return NULL;
950  }
951 
952  if (crm_autoreap) {
953  node = crm_update_peer_state(__FUNCTION__, node,
954  is_set(node->processes, crm_get_cluster_proc())?
956  }
957  } else {
958  crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
959  peer2text(flag), status);
960  }
961  return node;
962 }
963 
964 void
965 crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected)
966 {
967  char *last = NULL;
968  gboolean changed = FALSE;
969 
970  CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
971  return);
972 
973  /* Remote nodes don't participate in joins */
974  if (is_set(node->flags, crm_remote_node)) {
975  return;
976  }
977 
978  last = node->expected;
979  if (expected != NULL && safe_str_neq(node->expected, expected)) {
980  node->expected = strdup(expected);
981  changed = TRUE;
982  }
983 
984  if (changed) {
985  crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
986  expected, last);
987  free(last);
988  } else {
989  crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
990  node->id, expected);
991  }
992 }
993 
1010 static crm_node_t *
1011 crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, int membership, GHashTableIter *iter)
1012 {
1013  gboolean is_member;
1014 
1015  CRM_CHECK(node != NULL,
1016  crm_err("Could not set state for unknown host to %s"
1017  CRM_XS " source=%s", state, source);
1018  return NULL);
1019 
1020  is_member = safe_str_eq(state, CRM_NODE_MEMBER);
1021  if (membership && is_member) {
1022  node->last_seen = membership;
1023  }
1024 
1025  if (state && safe_str_neq(node->state, state)) {
1026  char *last = node->state;
1027  enum crm_status_type status_type = is_set(node->flags, crm_remote_node)?
1029 
1030  node->state = strdup(state);
1031  crm_notice("Node %s state is now %s " CRM_XS
1032  " nodeid=%u previous=%s source=%s", node->uname, state,
1033  node->id, (last? last : "unknown"), source);
1034  if (crm_status_callback) {
1035  crm_status_callback(status_type, node, last);
1036  }
1037  free(last);
1038 
1039  if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) {
1040  /* We only autoreap from the peer cache, not the remote peer cache,
1041  * because the latter should be managed only by
1042  * crm_remote_peer_cache_refresh().
1043  */
1044  if(iter) {
1045  crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
1046  g_hash_table_iter_remove(iter);
1047 
1048  } else {
1049  reap_crm_member(node->id, node->uname);
1050  }
1051  node = NULL;
1052  }
1053 
1054  } else {
1055  crm_trace("Node %s state is unchanged (%s) " CRM_XS
1056  " nodeid=%u source=%s", node->uname, state, node->id, source);
1057  }
1058  return node;
1059 }
1060 
1076 crm_node_t *
1077 crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership)
1078 {
1079  return crm_update_peer_state_iter(source, node, state, membership, NULL);
1080 }
1081 
1088 void
1089 crm_reap_unseen_nodes(uint64_t membership)
1090 {
1091  GHashTableIter iter;
1092  crm_node_t *node = NULL;
1093 
1094  crm_trace("Reaping unseen nodes...");
1095  g_hash_table_iter_init(&iter, crm_peer_cache);
1096  while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1097  if (node->last_seen != membership) {
1098  if (node->state) {
1099  /*
1100  * Calling crm_update_peer_state_iter() allows us to
1101  * remove the node from crm_peer_cache without
1102  * invalidating our iterator
1103  */
1104  crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter);
1105 
1106  } else {
1107  crm_info("State of node %s[%u] is still unknown",
1108  node->uname, node->id);
1109  }
1110  }
1111  }
1112 }
1113 
1114 int
1115 crm_terminate_member(int nodeid, const char *uname, void *unused)
1116 {
1117  /* Always use the synchronous, non-mainloop version */
1118  return stonith_api_kick(nodeid, uname, 120, TRUE);
1119 }
1120 
1121 int
1122 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1123 {
1124  return stonith_api_kick(nodeid, uname, 120, TRUE);
1125 }
void crm_peer_destroy(void)
Definition: membership.c:431
uint32_t votes
Definition: internal.h:50
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:164
#define crm_notice(fmt, args...)
Definition: logging.h:250
#define CRM_NODE_LOST
Definition: cluster.h:43
#define XPATH_REMOTE_NODE_STATUS
Definition: membership.c:247
void crm_set_status_callback(void(*dispatch)(enum crm_status_type, crm_node_t *, const void *))
Set a client function that will be called after peer status changes.
Definition: membership.c:459
gboolean is_openais_cluster(void)
Definition: cluster.c:630
#define crm_crit(fmt, args...)
Definition: logging.h:247
gboolean safe_str_neq(const char *a, const char *b)
Definition: strings.c:150
char * crm_generate_uuid(void)
Definition: utils.c:1390
uint64_t flags
Definition: cluster.h:76
void crm_reap_unseen_nodes(uint64_t ring_id)
Definition: membership.c:1089
uint32_t id
Definition: cluster.h:73
gboolean is_heartbeat_cluster(void)
Definition: cluster.c:645
gboolean crm_is_peer_active(const crm_node_t *node)
Definition: membership.c:295
gboolean crm_have_quorum
Definition: membership.c:65
uint64_t born
Definition: cluster.h:74
char * uuid
Definition: cluster.h:83
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
Definition: st_client.c:2552
int crm_terminate_member(int nodeid, const char *uname, void *unused)
Definition: membership.c:1115
void crm_set_autoreap(gboolean autoreap)
Tell the library whether to automatically reap lost nodes.
Definition: membership.c:476
void crm_peer_init(void)
Definition: membership.c:419
int get_corosync_id(int id, const char *uuid)
Definition: cluster.c:96
GHashTable * crm_remote_peer_cache
Definition: membership.c:62
char * addr
Definition: cluster.h:87
void crm_remote_peer_cache_add(const char *node_name)
Add a node to the remote peer cache.
Definition: membership.c:137
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
Definition: membership.c:676
guint crm_active_peers(void)
Definition: membership.c:393
#define clear_bit(word, bit)
Definition: crm_internal.h:191
char * get_node_name(uint32_t nodeid)
Definition: cluster.c:301
crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:521
crm_node_t * crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state)
Definition: membership.c:755
gboolean crm_is_corosync_peer_active(const crm_node_t *node)
Definition: corosync.c:468
char uname[MAX_NAME]
Definition: internal.h:53
#define crm_warn(fmt, args...)
Definition: logging.h:249
#define set_bit(word, bit)
Definition: crm_internal.h:190
uint32_t processes
Definition: cluster.h:79
uint32_t id
Definition: internal.h:48
void crm_remote_peer_cache_remove(const char *node_name)
Definition: membership.c:143
#define crm_debug(fmt, args...)
Definition: logging.h:253
#define XPATH_GUEST_NODE_CONFIG
Definition: membership.c:236
crm_status_type
Definition: cluster.h:198
#define crm_trace(fmt, args...)
Definition: logging.h:254
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:129
crm_node_t * crm_update_peer_proc(const char *source, crm_node_t *peer, uint32_t flag, const char *status)
Definition: membership.c:891
guint crm_strcase_hash(gconstpointer v)
Definition: strings.c:362
const char * crm_element_value(xmlNode *data, const char *name)
Definition: xml.c:5165
int corosync_cmap_has_config(const char *prefix)
Definition: corosync.c:594
#define CRM_NODE_MEMBER
Definition: cluster.h:44
void(* crm_status_callback)(enum crm_status_type, crm_node_t *, const void *)
Definition: membership.c:446
unsigned long long crm_peer_seq
Definition: membership.c:64
guint reap_crm_member(uint32_t id, const char *name)
Remove all peer cache entries matching a node ID and/or uname.
Definition: membership.c:352
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case)
Definition: strings.c:213
const char * name_for_cluster_type(enum cluster_type_e type)
Definition: cluster.c:468
char * expected
Definition: cluster.h:85
void crm_update_peer_expected(const char *source, crm_node_t *node, const char *expected)
Definition: membership.c:965
#define CRM_XS
Definition: logging.h:42
crm_node_t * crm_update_peer_state(const char *source, crm_node_t *node, const char *state, int membership)
Update a node&#39;s state and membership information.
Definition: membership.c:1077
crm_node_t * crm_remote_peer_get(const char *node_name)
Get a remote node peer cache entry, creating it if necessary.
Definition: membership.c:89
#define crm_err(fmt, args...)
Definition: logging.h:248
crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:502
Fencing aka. STONITH.
#define uint32_t
Definition: stdint.in.h:158
#define CRM_ASSERT(expr)
Definition: error.h:35
char data[0]
Definition: internal.h:58
char * state
Definition: cluster.h:84
void crm_foreach_xpath_result(xmlNode *xml, const char *xpath, void(*helper)(xmlNode *, void *), void *user_data)
Run a supplied function for each result of an xpath search.
Definition: xpath.c:179
#define U64T
Definition: config.h:754
Wrappers for and extensions to libqb IPC.
int32_t votes
Definition: cluster.h:78
char * uname
Definition: cluster.h:82
uint64_t last_seen
Definition: cluster.h:75
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:270
gboolean crm_is_true(const char *s)
Definition: strings.c:165
void crm_update_peer_uname(crm_node_t *node, const char *uname)
Definition: membership.c:838
#define safe_str_eq(a, b)
Definition: util.h:72
#define ONLINESTATUS
Definition: util.h:52
void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork)
Definition: utils.c:656
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
Definition: membership.c:1122
crm_node_t * crm_find_peer(unsigned int id, const char *uname)
Definition: membership.c:540
void crm_remote_peer_cache_refresh(xmlNode *cib)
Repopulate the remote peer cache based on CIB XML.
Definition: membership.c:257
GHashTable * crm_peer_cache
Definition: membership.c:44
#define XPATH_REMOTE_NODE_CONFIG
Definition: membership.c:242
#define crm_info(fmt, args...)
Definition: logging.h:251
const char * crm_peer_uuid(crm_node_t *node)
Definition: cluster.c:135
uint64_t flags
Definition: remote.c:156
#define s_if_plural(i)
Definition: membership.c:35
#define int32_t
Definition: stdint.in.h:157
enum cluster_type_e get_cluster_type(void)
Definition: cluster.c:513
int crm_remote_peer_cache_size(void)
Definition: membership.c:69