This source file includes following definitions.
- pcmk__cluster_has_quorum
- pcmk__cluster_set_quorum
- pcmk__cluster_num_remote_nodes
- pcmk__cluster_lookup_remote_node
- pcmk__cluster_forget_remote_node
- remote_state_from_cib
- remote_cache_refresh_helper
- mark_dirty
- is_dirty
- refresh_remote_nodes
- pcmk__cluster_is_node_active
- should_forget_cluster_node
- pcmk__cluster_forget_cluster_node
- count_peer
- pcmk__cluster_num_active_nodes
- destroy_crm_node
- pcmk__cluster_init_node_caches
- pcmk__cluster_destroy_node_caches
- pcmk__cluster_set_status_callback
- pcmk__cluster_set_autoreap
- dump_peer_hash
- hash_find_by_data
- search_cluster_member_cache
- pcmk__search_node_caches
- pcmk__purge_node_from_cache
- remove_conflicting_peer
- pcmk__get_node
- update_peer_uname
- proc2text
- crm_update_peer_proc
- pcmk__update_peer_expected
- update_peer_state_iter
- pcmk__update_peer_state
- pcmk__reap_unseen_nodes
- find_cib_cluster_node
- cluster_node_cib_cache_refresh_helper
- refresh_cluster_node_cib_cache
- pcmk__refresh_node_caches_from_cib
- crm_peer_init
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <inttypes.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <sys/param.h>
17 #include <sys/types.h>
18 #include <unistd.h>
19
20 #include <glib.h>
21
22 #include <crm/common/ipc.h>
23 #include <crm/common/xml_internal.h>
24 #include <crm/cluster/internal.h>
25 #include <crm/common/xml.h>
26 #include <crm/stonith-ng.h>
27 #include "crmcluster_private.h"
28
29
30
31
32
33
34
35
36
37 GHashTable *pcmk__peer_cache = NULL;
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55 GHashTable *pcmk__remote_peer_cache = NULL;
56
57
58
59
60
61
62
63 static GHashTable *cluster_node_cib_cache = NULL;
64
65 static bool autoreap = true;
66 static bool has_quorum = false;
67
68
69
70 #define set_peer_flags(peer, flags_to_set) do { \
71 (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
72 "Peer", (peer)->name, \
73 (peer)->flags, (flags_to_set), \
74 #flags_to_set); \
75 } while (0)
76
77 #define clear_peer_flags(peer, flags_to_clear) do { \
78 (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
79 LOG_TRACE, \
80 "Peer", (peer)->name, \
81 (peer)->flags, (flags_to_clear), \
82 #flags_to_clear); \
83 } while (0)
84
85 static void update_peer_uname(pcmk__node_status_t *node, const char *uname);
86 static pcmk__node_status_t *find_cib_cluster_node(const char *id,
87 const char *uname);
88
89
90
91
92
93
94
95 bool
96 pcmk__cluster_has_quorum(void)
97 {
98 return has_quorum;
99 }
100
101
102
103
104
105
106
107 void
108 pcmk__cluster_set_quorum(bool quorate)
109 {
110 has_quorum = quorate;
111 }
112
113
114
115
116
117
118
119 unsigned int
120 pcmk__cluster_num_remote_nodes(void)
121 {
122 if (pcmk__remote_peer_cache == NULL) {
123 return 0U;
124 }
125 return g_hash_table_size(pcmk__remote_peer_cache);
126 }
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143 pcmk__node_status_t *
144 pcmk__cluster_lookup_remote_node(const char *node_name)
145 {
146 pcmk__node_status_t *node = NULL;
147 char *node_name_copy = NULL;
148
149 if (node_name == NULL) {
150 errno = EINVAL;
151 return NULL;
152 }
153
154
155
156
157
158
159 node = pcmk__search_node_caches(0, node_name,
160 pcmk__node_search_cluster_member);
161 if ((node != NULL) && (node->xml_id == NULL)) {
162
163
164
165 node_name_copy = strdup(node_name);
166 if (node_name_copy == NULL) {
167 errno = ENOMEM;
168 return NULL;
169 }
170 node_name = node_name_copy;
171 pcmk__cluster_forget_cluster_node(0, node_name);
172 }
173
174
175 node = g_hash_table_lookup(pcmk__remote_peer_cache, node_name);
176 if (node) {
177 free(node_name_copy);
178 return node;
179 }
180
181
182 node = calloc(1, sizeof(pcmk__node_status_t));
183 if (node == NULL) {
184 free(node_name_copy);
185 return NULL;
186 }
187
188
189 set_peer_flags(node, pcmk__node_status_remote);
190 node->xml_id = strdup(node_name);
191 if (node->xml_id == NULL) {
192 free(node);
193 errno = ENOMEM;
194 free(node_name_copy);
195 return NULL;
196 }
197
198
199 g_hash_table_replace(pcmk__remote_peer_cache, node->xml_id, node);
200 crm_trace("added %s to remote cache", node_name);
201
202
203 update_peer_uname(node, node_name);
204 free(node_name_copy);
205 return node;
206 }
207
208
209
210
211
212
213
214
215
216
217 void
218 pcmk__cluster_forget_remote_node(const char *node_name)
219 {
220
221
222
223 if (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL) {
224 crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
225 g_hash_table_remove(pcmk__remote_peer_cache, node_name);
226 }
227 }
228
229
230
231
232
233
234
235
236
237
238 static const char *
239 remote_state_from_cib(const xmlNode *node_state)
240 {
241 bool in_ccm = false;
242
243 if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM,
244 &in_ccm) == pcmk_rc_ok) && in_ccm) {
245 return PCMK_VALUE_MEMBER;
246 }
247 return PCMK__VALUE_LOST;
248 }
249
250
251 struct refresh_data {
252 const char *field;
253 gboolean has_state;
254 };
255
256
257
258
259
260
261
262
263 static void
264 remote_cache_refresh_helper(xmlNode *result, void *user_data)
265 {
266 const struct refresh_data *data = user_data;
267 const char *remote = crm_element_value(result, data->field);
268 const char *state = NULL;
269 pcmk__node_status_t *node;
270
271 CRM_CHECK(remote != NULL, return);
272
273
274 if (data->has_state) {
275 state = remote_state_from_cib(result);
276 }
277
278
279 node = g_hash_table_lookup(pcmk__remote_peer_cache, remote);
280
281 if (node == NULL) {
282
283 node = pcmk__cluster_lookup_remote_node(remote);
284 pcmk__assert(node != NULL);
285 if (state) {
286 pcmk__update_peer_state(__func__, node, state, 0);
287 }
288
289 } else if (pcmk_is_set(node->flags, pcmk__node_status_dirty)) {
290
291 clear_peer_flags(node, pcmk__node_status_dirty);
292 if (state) {
293 pcmk__update_peer_state(__func__, node, state, 0);
294 }
295 }
296 }
297
298 static void
299 mark_dirty(gpointer key, gpointer value, gpointer user_data)
300 {
301 set_peer_flags((pcmk__node_status_t *) value, pcmk__node_status_dirty);
302 }
303
304 static gboolean
305 is_dirty(gpointer key, gpointer value, gpointer user_data)
306 {
307 const pcmk__node_status_t *node = value;
308
309 return pcmk_is_set(node->flags, pcmk__node_status_dirty);
310 }
311
312
313
314
315
316
317
318 static void
319 refresh_remote_nodes(xmlNode *cib)
320 {
321 struct refresh_data data;
322
323 pcmk__cluster_init_node_caches();
324
325
326
327
328
329 g_hash_table_foreach(pcmk__remote_peer_cache, mark_dirty, NULL);
330
331
332 data.field = PCMK_XA_ID;
333 data.has_state = TRUE;
334 crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS,
335 remote_cache_refresh_helper, &data);
336
337
338
339
340
341
342
343 data.field = PCMK_XA_VALUE;
344 data.has_state = FALSE;
345 crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG,
346 remote_cache_refresh_helper, &data);
347 data.field = PCMK_XA_ID;
348 data.has_state = FALSE;
349 crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG,
350 remote_cache_refresh_helper, &data);
351
352
353 g_hash_table_foreach_remove(pcmk__remote_peer_cache, is_dirty, NULL);
354 }
355
356
357
358
359
360
361
362
363
364
365
366
367 bool
368 pcmk__cluster_is_node_active(const pcmk__node_status_t *node)
369 {
370 const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
371
372 if ((node == NULL) || pcmk_is_set(node->flags, pcmk__node_status_remote)) {
373 return false;
374 }
375
376 switch (cluster_layer) {
377 case pcmk_cluster_layer_corosync:
378 #if SUPPORT_COROSYNC
379 return pcmk__corosync_is_peer_active(node);
380 #else
381 break;
382 #endif
383 default:
384 break;
385 }
386
387 crm_err("Unhandled cluster layer: %s",
388 pcmk_cluster_layer_text(cluster_layer));
389 return false;
390 }
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418 static gboolean
419 should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
420 {
421 pcmk__node_status_t *node = value;
422 pcmk__node_status_t *search = user_data;
423
424 if (search == NULL) {
425 return FALSE;
426 }
427 if ((search->cluster_layer_id != 0)
428 && (node->cluster_layer_id != search->cluster_layer_id)) {
429 return FALSE;
430 }
431 if ((search->cluster_layer_id == 0)
432 && !pcmk__str_eq(node->name, search->name, pcmk__str_casei)) {
433
434 return FALSE;
435 }
436 if (pcmk__cluster_is_node_active(value)) {
437 return FALSE;
438 }
439
440 crm_info("Removing node with name %s and cluster layer ID %" PRIu32
441 " from membership cache",
442 pcmk__s(node->name, "(unknown)"), node->cluster_layer_id);
443 return TRUE;
444 }
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464 void
465 pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
466 {
467 pcmk__node_status_t search = { 0, };
468 char *criterion = NULL;
469 guint matches = 0;
470
471 if (pcmk__peer_cache == NULL) {
472 crm_trace("Membership cache not initialized, ignoring removal request");
473 return;
474 }
475
476 search.cluster_layer_id = id;
477 search.name = pcmk__str_copy(node_name);
478
479 if (id > 0) {
480 criterion = crm_strdup_printf("cluster layer ID %" PRIu32, id);
481
482 } else if (node_name != NULL) {
483 criterion = crm_strdup_printf("name %s", node_name);
484 }
485
486 matches = g_hash_table_foreach_remove(pcmk__peer_cache,
487 should_forget_cluster_node, &search);
488 if (matches > 0) {
489 if (criterion != NULL) {
490 crm_notice("Removed %u inactive node%s with %s from the membership "
491 "cache",
492 matches, pcmk__plural_s(matches), criterion);
493 } else {
494 crm_notice("Removed all (%u) inactive cluster nodes from the "
495 "membership cache",
496 matches);
497 }
498
499 } else {
500 crm_info("No inactive cluster nodes%s%s to remove from the membership "
501 "cache",
502 ((criterion != NULL)? " with " : ""), pcmk__s(criterion, ""));
503 }
504
505 free(search.name);
506 free(criterion);
507 }
508
509 static void
510 count_peer(gpointer key, gpointer value, gpointer user_data)
511 {
512 unsigned int *count = user_data;
513 pcmk__node_status_t *node = value;
514
515 if (pcmk__cluster_is_node_active(node)) {
516 *count = *count + 1;
517 }
518 }
519
520
521
522
523
524
525
526
527
528
529 unsigned int
530 pcmk__cluster_num_active_nodes(void)
531 {
532 unsigned int count = 0;
533
534 if (pcmk__peer_cache != NULL) {
535 g_hash_table_foreach(pcmk__peer_cache, count_peer, &count);
536 }
537 return count;
538 }
539
540 static void
541 destroy_crm_node(gpointer data)
542 {
543 pcmk__node_status_t *node = data;
544
545 crm_trace("Destroying entry for node %" PRIu32 ": %s",
546 node->cluster_layer_id, node->name);
547
548 free(node->name);
549 free(node->state);
550 free(node->xml_id);
551 free(node->user_data);
552 free(node->expected);
553 free(node->conn_host);
554 free(node);
555 }
556
557
558
559
560
561 void
562 pcmk__cluster_init_node_caches(void)
563 {
564 if (pcmk__peer_cache == NULL) {
565 pcmk__peer_cache = pcmk__strikey_table(free, destroy_crm_node);
566 }
567
568 if (pcmk__remote_peer_cache == NULL) {
569 pcmk__remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
570 }
571
572 if (cluster_node_cib_cache == NULL) {
573 cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
574 }
575 }
576
577
578
579
580
581 void
582 pcmk__cluster_destroy_node_caches(void)
583 {
584 if (pcmk__peer_cache != NULL) {
585 crm_trace("Destroying peer cache with %d members",
586 g_hash_table_size(pcmk__peer_cache));
587 g_hash_table_destroy(pcmk__peer_cache);
588 pcmk__peer_cache = NULL;
589 }
590
591 if (pcmk__remote_peer_cache != NULL) {
592 crm_trace("Destroying remote peer cache with %d members",
593 pcmk__cluster_num_remote_nodes());
594 g_hash_table_destroy(pcmk__remote_peer_cache);
595 pcmk__remote_peer_cache = NULL;
596 }
597
598 if (cluster_node_cib_cache != NULL) {
599 crm_trace("Destroying configured cluster node cache with %d members",
600 g_hash_table_size(cluster_node_cib_cache));
601 g_hash_table_destroy(cluster_node_cib_cache);
602 cluster_node_cib_cache = NULL;
603 }
604 }
605
606 static void (*peer_status_callback)(enum pcmk__node_update,
607 pcmk__node_status_t *,
608 const void *) = NULL;
609
610
611
612
613
614
615
616
617
618
619 void
620 pcmk__cluster_set_status_callback(void (*dispatch)(enum pcmk__node_update,
621 pcmk__node_status_t *,
622 const void *))
623 {
624
625 peer_status_callback = dispatch;
626 }
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642 void
643 pcmk__cluster_set_autoreap(bool enable)
644 {
645 autoreap = enable;
646 }
647
648 static void
649 dump_peer_hash(int level, const char *caller)
650 {
651 GHashTableIter iter;
652 const char *id = NULL;
653 pcmk__node_status_t *node = NULL;
654
655 g_hash_table_iter_init(&iter, pcmk__peer_cache);
656 while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
657 do_crm_log(level, "%s: Node %" PRIu32 "/%s = %p - %s",
658 caller, node->cluster_layer_id, node->name, node, id);
659 }
660 }
661
662 static gboolean
663 hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
664 {
665 return value == user_data;
666 }
667
668
669
670
671
672
673
674
675
676
677
678
679 static pcmk__node_status_t *
680 search_cluster_member_cache(unsigned int id, const char *uname,
681 const char *uuid)
682 {
683 GHashTableIter iter;
684 pcmk__node_status_t *node = NULL;
685 pcmk__node_status_t *by_id = NULL;
686 pcmk__node_status_t *by_name = NULL;
687
688 pcmk__assert((id > 0) || (uname != NULL));
689
690 pcmk__cluster_init_node_caches();
691
692 if (uname != NULL) {
693 g_hash_table_iter_init(&iter, pcmk__peer_cache);
694 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
695 if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) {
696 crm_trace("Name match: %s", node->name);
697 by_name = node;
698 break;
699 }
700 }
701 }
702
703 if (id > 0) {
704 g_hash_table_iter_init(&iter, pcmk__peer_cache);
705 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
706 if (node->cluster_layer_id == id) {
707 crm_trace("ID match: %" PRIu32, node->cluster_layer_id);
708 by_id = node;
709 break;
710 }
711 }
712
713 } else if (uuid != NULL) {
714 g_hash_table_iter_init(&iter, pcmk__peer_cache);
715 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
716 if (pcmk__str_eq(node->xml_id, uuid, pcmk__str_casei)) {
717 crm_trace("UUID match: %s", node->xml_id);
718 by_id = node;
719 break;
720 }
721 }
722 }
723
724 node = by_id;
725 if(by_id == by_name) {
726
727 crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
728
729 } else if(by_id == NULL && by_name) {
730 crm_trace("Only one: %p for %u/%s", by_name, id, uname);
731
732 if (id && by_name->cluster_layer_id) {
733 dump_peer_hash(LOG_WARNING, __func__);
734 crm_crit("Nodes %u and %" PRIu32 " share the same name '%s'",
735 id, by_name->cluster_layer_id, uname);
736 node = NULL;
737
738 } else {
739 node = by_name;
740 }
741
742 } else if(by_name == NULL && by_id) {
743 crm_trace("Only one: %p for %u/%s", by_id, id, uname);
744
745 if ((uname != NULL) && (by_id->name != NULL)) {
746 dump_peer_hash(LOG_WARNING, __func__);
747 crm_crit("Nodes '%s' and '%s' share the same cluster nodeid %u: "
748 "assuming '%s' is correct",
749 uname, by_id->name, id, uname);
750 }
751
752 } else if ((uname != NULL) && (by_id->name != NULL)) {
753 if (pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) {
754 crm_notice("Node '%s' has changed its cluster layer ID "
755 "from %" PRIu32 " to %" PRIu32,
756 by_id->name, by_name->cluster_layer_id,
757 by_id->cluster_layer_id);
758 g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data,
759 by_name);
760
761 } else {
762 crm_warn("Nodes '%s' and '%s' share the same cluster nodeid: %u %s",
763 by_id->name, by_name->name, id, uname);
764 dump_peer_hash(LOG_INFO, __func__);
765 crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
766 TRUE);
767 }
768
769 } else if ((id > 0) && (by_name->cluster_layer_id > 0)) {
770 crm_warn("Nodes %" PRIu32 " and %" PRIu32 " share the same name: '%s'",
771 by_id->cluster_layer_id, by_name->cluster_layer_id, uname);
772
773 } else {
774
775
776
777
778
779
780 dump_peer_hash(LOG_DEBUG, __func__);
781
782 crm_info("Merging %p into %p", by_name, by_id);
783 g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data,
784 by_name);
785 }
786
787 return node;
788 }
789
790
791
792
793
794
795
796
797
798
799
800 pcmk__node_status_t *
801 pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
802 {
803 pcmk__node_status_t *node = NULL;
804
805 pcmk__assert((id > 0) || (uname != NULL));
806
807 pcmk__cluster_init_node_caches();
808
809 if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) {
810 node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
811 }
812
813 if ((node == NULL)
814 && pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
815
816 node = search_cluster_member_cache(id, uname, NULL);
817 }
818
819 if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
820 char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);
821
822 node = find_cib_cluster_node(id_str, uname);
823 free(id_str);
824 }
825
826 return node;
827 }
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843 void
844 pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
845 {
846 char *node_name_copy = NULL;
847
848 if ((node_name == NULL) && (node_id == 0U)) {
849 return;
850 }
851
852
853 if ((node_name != NULL)
854 && (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL)) {
855
856
857
858 node_name_copy = pcmk__str_copy(node_name);
859 node_name = node_name_copy;
860
861 crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
862 g_hash_table_remove(pcmk__remote_peer_cache, node_name);
863 }
864
865 pcmk__cluster_forget_cluster_node(node_id, node_name);
866 free(node_name_copy);
867 }
868
869 #if SUPPORT_COROSYNC
870 static guint
871 remove_conflicting_peer(pcmk__node_status_t *node)
872 {
873 int matches = 0;
874 GHashTableIter iter;
875 pcmk__node_status_t *existing_node = NULL;
876
877 if ((node->cluster_layer_id == 0) || (node->name == NULL)) {
878 return 0;
879 }
880
881 if (!pcmk__corosync_has_nodelist()) {
882 return 0;
883 }
884
885 g_hash_table_iter_init(&iter, pcmk__peer_cache);
886 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
887 if ((existing_node->cluster_layer_id > 0)
888 && (existing_node->cluster_layer_id != node->cluster_layer_id)
889 && pcmk__str_eq(existing_node->name, node->name, pcmk__str_casei)) {
890
891 if (pcmk__cluster_is_node_active(existing_node)) {
892 continue;
893 }
894
895 crm_warn("Removing cached offline node %" PRIu32 "/%s which has "
896 "conflicting name with %" PRIu32,
897 existing_node->cluster_layer_id, existing_node->name,
898 node->cluster_layer_id);
899
900 g_hash_table_iter_remove(&iter);
901 matches++;
902 }
903 }
904
905 return matches;
906 }
907 #endif
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926 pcmk__node_status_t *
927 pcmk__get_node(unsigned int id, const char *uname, const char *xml_id,
928 uint32_t flags)
929 {
930 pcmk__node_status_t *node = NULL;
931 char *uname_lookup = NULL;
932
933 pcmk__assert((id > 0) || (uname != NULL));
934
935 pcmk__cluster_init_node_caches();
936
937
938 if (pcmk_is_set(flags, pcmk__node_search_remote)) {
939 node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
940 if (node != NULL) {
941 return node;
942 }
943 }
944
945 if (!pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
946 return NULL;
947 }
948
949 node = search_cluster_member_cache(id, uname, xml_id);
950
951
952
953 if ((uname == NULL) && ((node == NULL) || (node->name == NULL))) {
954 uname_lookup = pcmk__cluster_node_name(id);
955 }
956
957 if (uname_lookup) {
958 uname = uname_lookup;
959 crm_trace("Inferred a name of '%s' for node %u", uname, id);
960
961
962 if (node == NULL) {
963 node = search_cluster_member_cache(id, uname, xml_id);
964 }
965 }
966
967 if (node == NULL) {
968 char *uniqueid = crm_generate_uuid();
969
970 node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t));
971
972 crm_info("Created entry %s/%p for node %s/%u (%d total)",
973 uniqueid, node, uname, id,
974 1 + g_hash_table_size(pcmk__peer_cache));
975 g_hash_table_replace(pcmk__peer_cache, uniqueid, node);
976 }
977
978 if ((id > 0) && (uname != NULL)
979 && ((node->cluster_layer_id == 0) || (node->name == NULL))) {
980 crm_info("Node %u is now known as %s", id, uname);
981 }
982
983 if ((id > 0) && (node->cluster_layer_id == 0)) {
984 node->cluster_layer_id = id;
985 }
986
987 if ((uname != NULL) && (node->name == NULL)) {
988 update_peer_uname(node, uname);
989 }
990
991 if ((xml_id == NULL) && (node->xml_id == NULL)) {
992 xml_id = pcmk__cluster_node_uuid(node);
993 if (xml_id == NULL) {
994 crm_debug("Cannot obtain an XML ID for node %s[%u] at this time",
995 node->name, id);
996 } else {
997 crm_info("Node %s[%u] has XML ID %s", node->name, id, xml_id);
998 }
999 }
1000
1001 free(uname_lookup);
1002
1003 return node;
1004 }
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017 static void
1018 update_peer_uname(pcmk__node_status_t *node, const char *uname)
1019 {
1020 CRM_CHECK(uname != NULL,
1021 crm_err("Bug: can't update node name without name"); return);
1022 CRM_CHECK(node != NULL,
1023 crm_err("Bug: can't update node name to %s without node", uname);
1024 return);
1025
1026 if (pcmk__str_eq(uname, node->name, pcmk__str_casei)) {
1027 crm_debug("Node name '%s' did not change", uname);
1028 return;
1029 }
1030
1031 for (const char *c = uname; *c; ++c) {
1032 if ((*c >= 'A') && (*c <= 'Z')) {
1033 crm_warn("Node names with capitals are discouraged, consider changing '%s'",
1034 uname);
1035 break;
1036 }
1037 }
1038
1039 pcmk__str_update(&node->name, uname);
1040
1041 if (peer_status_callback != NULL) {
1042 peer_status_callback(pcmk__node_update_name, node, NULL);
1043 }
1044
1045 #if SUPPORT_COROSYNC
1046 if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)
1047 && !pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1048
1049 remove_conflicting_peer(node);
1050 }
1051 #endif
1052 }
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062 static inline const char *
1063 proc2text(enum crm_proc_flag proc)
1064 {
1065 const char *text = "unknown";
1066
1067 switch (proc) {
1068 case crm_proc_none:
1069 text = "none";
1070 break;
1071 case crm_proc_cpg:
1072 text = "corosync-cpg";
1073 break;
1074 }
1075 return text;
1076 }
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094 pcmk__node_status_t *
1095 crm_update_peer_proc(const char *source, pcmk__node_status_t *node,
1096 uint32_t flag, const char *status)
1097 {
1098 uint32_t last = 0;
1099 gboolean changed = FALSE;
1100
1101 CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
1102 source, proc2text(flag), status);
1103 return NULL);
1104
1105
1106 if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1107 return node;
1108 }
1109
1110 last = node->processes;
1111 if (status == NULL) {
1112 node->processes = flag;
1113 if (node->processes != last) {
1114 changed = TRUE;
1115 }
1116
1117 } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1118 if ((node->processes & flag) != flag) {
1119 node->processes = pcmk__set_flags_as(__func__, __LINE__,
1120 LOG_TRACE, "Peer process",
1121 node->name, node->processes,
1122 flag, "processes");
1123 changed = TRUE;
1124 }
1125
1126 } else if (node->processes & flag) {
1127 node->processes = pcmk__clear_flags_as(__func__, __LINE__,
1128 LOG_TRACE, "Peer process",
1129 node->name, node->processes,
1130 flag, "processes");
1131 changed = TRUE;
1132 }
1133
1134 if (changed) {
1135 if (status == NULL && flag <= crm_proc_none) {
1136 crm_info("%s: Node %s[%" PRIu32 "] - all processes are now offline",
1137 source, node->name, node->cluster_layer_id);
1138 } else {
1139 crm_info("%s: Node %s[%" PRIu32 "] - %s is now %s",
1140 source, node->name, node->cluster_layer_id,
1141 proc2text(flag), status);
1142 }
1143
1144 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1145 node->when_online = time(NULL);
1146
1147 } else {
1148 node->when_online = 0;
1149 }
1150
1151
1152
1153
1154 if (peer_status_callback != NULL) {
1155 peer_status_callback(pcmk__node_update_processes, node, &last);
1156 }
1157
1158
1159
1160
1161 if (pcmk__peer_cache == NULL) {
1162 return NULL;
1163 }
1164
1165 if (autoreap) {
1166 const char *peer_state = NULL;
1167
1168 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1169 peer_state = PCMK_VALUE_MEMBER;
1170 } else {
1171 peer_state = PCMK__VALUE_LOST;
1172 }
1173 node = pcmk__update_peer_state(__func__, node, peer_state, 0);
1174 }
1175 } else {
1176 crm_trace("%s: Node %s[%" PRIu32 "] - %s is unchanged (%s)",
1177 source, node->name, node->cluster_layer_id, proc2text(flag),
1178 status);
1179 }
1180 return node;
1181 }
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191 void
1192 pcmk__update_peer_expected(const char *source, pcmk__node_status_t *node,
1193 const char *expected)
1194 {
1195 char *last = NULL;
1196 gboolean changed = FALSE;
1197
1198 CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
1199 return);
1200
1201
1202 if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1203 return;
1204 }
1205
1206 last = node->expected;
1207 if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
1208 node->expected = strdup(expected);
1209 changed = TRUE;
1210 }
1211
1212 if (changed) {
1213 crm_info("%s: Node %s[%" PRIu32 "] - expected state is now %s (was %s)",
1214 source, node->name, node->cluster_layer_id, expected, last);
1215 free(last);
1216 } else {
1217 crm_trace("%s: Node %s[%" PRIu32 "] - expected state is unchanged (%s)",
1218 source, node->name, node->cluster_layer_id, expected);
1219 }
1220 }
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238 static pcmk__node_status_t *
1239 update_peer_state_iter(const char *source, pcmk__node_status_t *node,
1240 const char *state, uint64_t membership,
1241 GHashTableIter *iter)
1242 {
1243 gboolean is_member;
1244
1245 CRM_CHECK(node != NULL,
1246 crm_err("Could not set state for unknown host to %s "
1247 QB_XS " source=%s", state, source);
1248 return NULL);
1249
1250 is_member = pcmk__str_eq(state, PCMK_VALUE_MEMBER, pcmk__str_none);
1251 if (is_member) {
1252 node->when_lost = 0;
1253 if (membership) {
1254 node->membership_id = membership;
1255 }
1256 }
1257
1258 if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
1259 char *last = node->state;
1260
1261 if (is_member) {
1262 node->when_member = time(NULL);
1263
1264 } else {
1265 node->when_member = 0;
1266 }
1267
1268 node->state = strdup(state);
1269 crm_notice("Node %s state is now %s " QB_XS
1270 " nodeid=%" PRIu32 " previous=%s source=%s",
1271 node->name, state, node->cluster_layer_id,
1272 pcmk__s(last, "unknown"), source);
1273 if (peer_status_callback != NULL) {
1274 peer_status_callback(pcmk__node_update_state, node, last);
1275 }
1276 free(last);
1277
1278 if (autoreap && !is_member
1279 && !pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1280
1281
1282
1283
1284 if(iter) {
1285 crm_notice("Purged 1 peer with cluster layer ID %" PRIu32
1286 "and/or name=%s from the membership cache",
1287 node->cluster_layer_id, node->name);
1288 g_hash_table_iter_remove(iter);
1289
1290 } else {
1291 pcmk__cluster_forget_cluster_node(node->cluster_layer_id,
1292 node->name);
1293 }
1294 node = NULL;
1295 }
1296
1297 } else {
1298 crm_trace("Node %s state is unchanged (%s) " QB_XS
1299 " nodeid=%" PRIu32 " source=%s",
1300 node->name, state, node->cluster_layer_id, source);
1301 }
1302 return node;
1303 }
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320 pcmk__node_status_t *
1321 pcmk__update_peer_state(const char *source, pcmk__node_status_t *node,
1322 const char *state, uint64_t membership)
1323 {
1324 return update_peer_state_iter(source, node, state, membership, NULL);
1325 }
1326
1327
1328
1329
1330
1331
1332
1333 void
1334 pcmk__reap_unseen_nodes(uint64_t membership)
1335 {
1336 GHashTableIter iter;
1337 pcmk__node_status_t *node = NULL;
1338
1339 crm_trace("Reaping unseen nodes...");
1340 g_hash_table_iter_init(&iter, pcmk__peer_cache);
1341 while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1342 if (node->membership_id != membership) {
1343 if (node->state) {
1344
1345
1346
1347 update_peer_state_iter(__func__, node, PCMK__VALUE_LOST,
1348 membership, &iter);
1349
1350 } else {
1351 crm_info("State of node %s[%" PRIu32 "] is still unknown",
1352 node->name, node->cluster_layer_id);
1353 }
1354 }
1355 }
1356 }
1357
1358 static pcmk__node_status_t *
1359 find_cib_cluster_node(const char *id, const char *uname)
1360 {
1361 GHashTableIter iter;
1362 pcmk__node_status_t *node = NULL;
1363 pcmk__node_status_t *by_id = NULL;
1364 pcmk__node_status_t *by_name = NULL;
1365
1366 if (uname) {
1367 g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1368 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1369 if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) {
1370 crm_trace("Name match: %s = %p", node->name, node);
1371 by_name = node;
1372 break;
1373 }
1374 }
1375 }
1376
1377 if (id) {
1378 g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1379 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1380 if (pcmk__str_eq(node->xml_id, id, pcmk__str_casei)) {
1381 crm_trace("ID match: %s= %p", id, node);
1382 by_id = node;
1383 break;
1384 }
1385 }
1386 }
1387
1388 node = by_id;
1389 if (by_id == by_name) {
1390
1391 crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1392
1393 } else if (by_id == NULL && by_name) {
1394 crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1395
1396 if (id) {
1397 node = NULL;
1398
1399 } else {
1400 node = by_name;
1401 }
1402
1403 } else if (by_name == NULL && by_id) {
1404 crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1405
1406 if (uname) {
1407 node = NULL;
1408 }
1409
1410 } else if ((uname != NULL) && (by_id->name != NULL)
1411 && pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) {
1412
1413
1414
1415 } else if ((id != NULL) && (by_name->xml_id != NULL)
1416 && pcmk__str_eq(id, by_name->xml_id, pcmk__str_casei)) {
1417
1418
1419 node = by_name;
1420
1421 } else {
1422 node = NULL;
1423 }
1424
1425 if (node == NULL) {
1426 crm_debug("Couldn't find node%s%s%s%s",
1427 id? " " : "",
1428 id? id : "",
1429 uname? " with name " : "",
1430 uname? uname : "");
1431 }
1432
1433 return node;
1434 }
1435
1436 static void
1437 cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1438 {
1439 const char *id = crm_element_value(xml_node, PCMK_XA_ID);
1440 const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME);
1441 pcmk__node_status_t * node = NULL;
1442
1443 CRM_CHECK(id != NULL && uname !=NULL, return);
1444 node = find_cib_cluster_node(id, uname);
1445
1446 if (node == NULL) {
1447 char *uniqueid = crm_generate_uuid();
1448
1449 node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t));
1450
1451 node->name = pcmk__str_copy(uname);
1452 node->xml_id = pcmk__str_copy(id);
1453
1454 g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);
1455
1456 } else if (pcmk_is_set(node->flags, pcmk__node_status_dirty)) {
1457 pcmk__str_update(&node->name, uname);
1458
1459
1460 clear_peer_flags(node, pcmk__node_status_dirty);
1461 }
1462
1463 }
1464
1465 static void
1466 refresh_cluster_node_cib_cache(xmlNode *cib)
1467 {
1468 pcmk__cluster_init_node_caches();
1469
1470 g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);
1471
1472 crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG,
1473 cluster_node_cib_cache_refresh_helper, NULL);
1474
1475
1476 g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
1477 }
1478
1479 void
1480 pcmk__refresh_node_caches_from_cib(xmlNode *cib)
1481 {
1482 refresh_remote_nodes(cib);
1483 refresh_cluster_node_cib_cache(cib);
1484 }
1485
1486
1487
1488
1489 #include <crm/cluster/compat.h>
1490
1491 void
1492 crm_peer_init(void)
1493 {
1494 pcmk__cluster_init_node_caches();
1495 }
1496
1497
1498