This source file includes following definitions.
- pcmk__cluster_has_quorum
- pcmk__cluster_set_quorum
- pcmk__cluster_num_remote_nodes
- pcmk__cluster_lookup_remote_node
- pcmk__cluster_forget_remote_node
- remote_state_from_cib
- remote_cache_refresh_helper
- mark_dirty
- is_dirty
- refresh_remote_nodes
- pcmk__cluster_is_node_active
- should_forget_cluster_node
- pcmk__cluster_forget_cluster_node
- count_peer
- pcmk__cluster_num_active_nodes
- destroy_crm_node
- pcmk__cluster_init_node_caches
- pcmk__cluster_destroy_node_caches
- pcmk__cluster_set_status_callback
- pcmk__cluster_set_autoreap
- dump_peer_hash
- hash_find_by_data
- search_cluster_member_cache
- pcmk__search_node_caches
- pcmk__purge_node_from_cache
- remove_conflicting_peer
- pcmk__get_node
- update_peer_uname
- proc2text
- crm_update_peer_proc
- pcmk__update_peer_expected
- update_peer_state_iter
- pcmk__update_peer_state
- pcmk__reap_unseen_nodes
- find_cib_cluster_node
- cluster_node_cib_cache_refresh_helper
- refresh_cluster_node_cib_cache
- pcmk__refresh_node_caches_from_cib
- crm_peer_init
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <inttypes.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <sys/param.h>
17 #include <sys/types.h>
18 #include <unistd.h>
19
20 #include <glib.h>
21
22 #include <crm/common/ipc.h>
23 #include <crm/common/xml_internal.h>
24 #include <crm/cluster/internal.h>
25 #include <crm/common/xml.h>
26 #include <crm/stonith-ng.h>
27 #include "crmcluster_private.h"
28
29
30
31
32
33
34
35
36
37 GHashTable *pcmk__peer_cache = NULL;
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55 GHashTable *pcmk__remote_peer_cache = NULL;
56
57
58
59
60
61
62
63 static GHashTable *cluster_node_cib_cache = NULL;
64
65 static bool autoreap = true;
66 static bool has_quorum = false;
67
68
69
70 #define set_peer_flags(peer, flags_to_set) do { \
71 (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
72 "Peer", (peer)->name, \
73 (peer)->flags, (flags_to_set), \
74 #flags_to_set); \
75 } while (0)
76
77 #define clear_peer_flags(peer, flags_to_clear) do { \
78 (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
79 LOG_TRACE, \
80 "Peer", (peer)->name, \
81 (peer)->flags, (flags_to_clear), \
82 #flags_to_clear); \
83 } while (0)
84
85 static void update_peer_uname(pcmk__node_status_t *node, const char *uname);
86 static pcmk__node_status_t *find_cib_cluster_node(const char *id,
87 const char *uname);
88
89
90
91
92
93
94
95 bool
96 pcmk__cluster_has_quorum(void)
97 {
98 return has_quorum;
99 }
100
101
102
103
104
105
106
107 void
108 pcmk__cluster_set_quorum(bool quorate)
109 {
110 has_quorum = quorate;
111 }
112
113
114
115
116
117
118
119 unsigned int
120 pcmk__cluster_num_remote_nodes(void)
121 {
122 if (pcmk__remote_peer_cache == NULL) {
123 return 0U;
124 }
125 return g_hash_table_size(pcmk__remote_peer_cache);
126 }
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143 pcmk__node_status_t *
144 pcmk__cluster_lookup_remote_node(const char *node_name)
145 {
146 pcmk__node_status_t *node = NULL;
147 char *node_name_copy = NULL;
148
149 if (node_name == NULL) {
150 errno = EINVAL;
151 return NULL;
152 }
153
154
155
156
157
158
159 node = pcmk__search_node_caches(0, node_name, NULL,
160 pcmk__node_search_cluster_member);
161 if ((node != NULL)
162 && ((node->xml_id == NULL)
163
164
165
166 || pcmk__str_eq(node->name, node->xml_id, pcmk__str_none))) {
167
168
169
170
171 node_name_copy = strdup(node_name);
172 if (node_name_copy == NULL) {
173 errno = ENOMEM;
174 return NULL;
175 }
176 node_name = node_name_copy;
177 pcmk__cluster_forget_cluster_node(0, node_name);
178 }
179
180
181 node = g_hash_table_lookup(pcmk__remote_peer_cache, node_name);
182 if (node) {
183 free(node_name_copy);
184 return node;
185 }
186
187
188 node = calloc(1, sizeof(pcmk__node_status_t));
189 if (node == NULL) {
190 free(node_name_copy);
191 return NULL;
192 }
193
194
195 set_peer_flags(node, pcmk__node_status_remote);
196 node->xml_id = strdup(node_name);
197 if (node->xml_id == NULL) {
198 free(node);
199 errno = ENOMEM;
200 free(node_name_copy);
201 return NULL;
202 }
203
204
205 g_hash_table_replace(pcmk__remote_peer_cache, node->xml_id, node);
206 crm_trace("added %s to remote cache", node_name);
207
208
209 update_peer_uname(node, node_name);
210 free(node_name_copy);
211 return node;
212 }
213
214
215
216
217
218
219
220
221
222
223 void
224 pcmk__cluster_forget_remote_node(const char *node_name)
225 {
226
227
228
229 if (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL) {
230 crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
231 g_hash_table_remove(pcmk__remote_peer_cache, node_name);
232 }
233 }
234
235
236
237
238
239
240
241
242
243
244 static const char *
245 remote_state_from_cib(const xmlNode *node_state)
246 {
247 bool in_ccm = false;
248
249 if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM,
250 &in_ccm) == pcmk_rc_ok) && in_ccm) {
251 return PCMK_VALUE_MEMBER;
252 }
253 return PCMK__VALUE_LOST;
254 }
255
256
257 struct refresh_data {
258 const char *field;
259 gboolean has_state;
260 };
261
262
263
264
265
266
267
268
269 static void
270 remote_cache_refresh_helper(xmlNode *result, void *user_data)
271 {
272 const struct refresh_data *data = user_data;
273 const char *remote = crm_element_value(result, data->field);
274 const char *state = NULL;
275 pcmk__node_status_t *node;
276
277 CRM_CHECK(remote != NULL, return);
278
279
280 if (data->has_state) {
281 state = remote_state_from_cib(result);
282 }
283
284
285 node = g_hash_table_lookup(pcmk__remote_peer_cache, remote);
286
287 if (node == NULL) {
288
289 node = pcmk__cluster_lookup_remote_node(remote);
290 pcmk__assert(node != NULL);
291 if (state) {
292 pcmk__update_peer_state(__func__, node, state, 0);
293 }
294
295 } else if (pcmk_is_set(node->flags, pcmk__node_status_dirty)) {
296
297 clear_peer_flags(node, pcmk__node_status_dirty);
298 if (state) {
299 pcmk__update_peer_state(__func__, node, state, 0);
300 }
301 }
302 }
303
304 static void
305 mark_dirty(gpointer key, gpointer value, gpointer user_data)
306 {
307 set_peer_flags((pcmk__node_status_t *) value, pcmk__node_status_dirty);
308 }
309
310 static gboolean
311 is_dirty(gpointer key, gpointer value, gpointer user_data)
312 {
313 const pcmk__node_status_t *node = value;
314
315 return pcmk_is_set(node->flags, pcmk__node_status_dirty);
316 }
317
318
319
320
321
322
323
324 static void
325 refresh_remote_nodes(xmlNode *cib)
326 {
327 struct refresh_data data;
328
329 pcmk__cluster_init_node_caches();
330
331
332
333
334
335 g_hash_table_foreach(pcmk__remote_peer_cache, mark_dirty, NULL);
336
337
338 data.field = PCMK_XA_ID;
339 data.has_state = TRUE;
340 pcmk__xpath_foreach_result(cib->doc, PCMK__XP_REMOTE_NODE_STATUS,
341 remote_cache_refresh_helper, &data);
342
343
344
345
346
347
348
349 data.field = PCMK_XA_VALUE;
350 data.has_state = FALSE;
351 pcmk__xpath_foreach_result(cib->doc, PCMK__XP_GUEST_NODE_CONFIG,
352 remote_cache_refresh_helper, &data);
353 data.field = PCMK_XA_ID;
354 data.has_state = FALSE;
355 pcmk__xpath_foreach_result(cib->doc, PCMK__XP_REMOTE_NODE_CONFIG,
356 remote_cache_refresh_helper, &data);
357
358
359 g_hash_table_foreach_remove(pcmk__remote_peer_cache, is_dirty, NULL);
360 }
361
362
363
364
365
366
367
368
369
370
371
372
373 bool
374 pcmk__cluster_is_node_active(const pcmk__node_status_t *node)
375 {
376 const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
377
378 if ((node == NULL) || pcmk_is_set(node->flags, pcmk__node_status_remote)) {
379 return false;
380 }
381
382 switch (cluster_layer) {
383 case pcmk_cluster_layer_corosync:
384 #if SUPPORT_COROSYNC
385 return pcmk__corosync_is_peer_active(node);
386 #else
387 break;
388 #endif
389 default:
390 break;
391 }
392
393 crm_err("Unhandled cluster layer: %s",
394 pcmk_cluster_layer_text(cluster_layer));
395 return false;
396 }
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424 static gboolean
425 should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
426 {
427 pcmk__node_status_t *node = value;
428 pcmk__node_status_t *search = user_data;
429
430 if (search == NULL) {
431 return FALSE;
432 }
433 if ((search->cluster_layer_id != 0)
434 && (node->cluster_layer_id != search->cluster_layer_id)) {
435 return FALSE;
436 }
437 if ((search->cluster_layer_id == 0)
438 && !pcmk__str_eq(node->name, search->name, pcmk__str_casei)) {
439
440 return FALSE;
441 }
442 if (pcmk__cluster_is_node_active(value)) {
443 return FALSE;
444 }
445
446 crm_info("Removing node with name %s and cluster layer ID %" PRIu32
447 " from membership cache",
448 pcmk__s(node->name, "(unknown)"), node->cluster_layer_id);
449 return TRUE;
450 }
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470 void
471 pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
472 {
473 pcmk__node_status_t search = { 0, };
474 char *criterion = NULL;
475 guint matches = 0;
476
477 if (pcmk__peer_cache == NULL) {
478 crm_trace("Membership cache not initialized, ignoring removal request");
479 return;
480 }
481
482 search.cluster_layer_id = id;
483 search.name = pcmk__str_copy(node_name);
484
485 if (id > 0) {
486 criterion = crm_strdup_printf("cluster layer ID %" PRIu32, id);
487
488 } else if (node_name != NULL) {
489 criterion = crm_strdup_printf("name %s", node_name);
490 }
491
492 matches = g_hash_table_foreach_remove(pcmk__peer_cache,
493 should_forget_cluster_node, &search);
494 if (matches > 0) {
495 if (criterion != NULL) {
496 crm_notice("Removed %u inactive node%s with %s from the membership "
497 "cache",
498 matches, pcmk__plural_s(matches), criterion);
499 } else {
500 crm_notice("Removed all (%u) inactive cluster nodes from the "
501 "membership cache",
502 matches);
503 }
504
505 } else {
506 crm_info("No inactive cluster nodes%s%s to remove from the membership "
507 "cache",
508 ((criterion != NULL)? " with " : ""), pcmk__s(criterion, ""));
509 }
510
511 free(search.name);
512 free(criterion);
513 }
514
515 static void
516 count_peer(gpointer key, gpointer value, gpointer user_data)
517 {
518 unsigned int *count = user_data;
519 pcmk__node_status_t *node = value;
520
521 if (pcmk__cluster_is_node_active(node)) {
522 *count = *count + 1;
523 }
524 }
525
526
527
528
529
530
531
532
533
534
535 unsigned int
536 pcmk__cluster_num_active_nodes(void)
537 {
538 unsigned int count = 0;
539
540 if (pcmk__peer_cache != NULL) {
541 g_hash_table_foreach(pcmk__peer_cache, count_peer, &count);
542 }
543 return count;
544 }
545
546 static void
547 destroy_crm_node(gpointer data)
548 {
549 pcmk__node_status_t *node = data;
550
551 crm_trace("Destroying entry for node %" PRIu32 ": %s",
552 node->cluster_layer_id, node->name);
553
554 free(node->name);
555 free(node->state);
556 free(node->xml_id);
557 free(node->user_data);
558 free(node->expected);
559 free(node->conn_host);
560 free(node);
561 }
562
563
564
565
566
567 void
568 pcmk__cluster_init_node_caches(void)
569 {
570 if (pcmk__peer_cache == NULL) {
571 pcmk__peer_cache = pcmk__strikey_table(free, destroy_crm_node);
572 }
573
574 if (pcmk__remote_peer_cache == NULL) {
575 pcmk__remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
576 }
577
578 if (cluster_node_cib_cache == NULL) {
579 cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
580 }
581 }
582
583
584
585
586
587 void
588 pcmk__cluster_destroy_node_caches(void)
589 {
590 if (pcmk__peer_cache != NULL) {
591 crm_trace("Destroying peer cache with %d members",
592 g_hash_table_size(pcmk__peer_cache));
593 g_hash_table_destroy(pcmk__peer_cache);
594 pcmk__peer_cache = NULL;
595 }
596
597 if (pcmk__remote_peer_cache != NULL) {
598 crm_trace("Destroying remote peer cache with %d members",
599 pcmk__cluster_num_remote_nodes());
600 g_hash_table_destroy(pcmk__remote_peer_cache);
601 pcmk__remote_peer_cache = NULL;
602 }
603
604 if (cluster_node_cib_cache != NULL) {
605 crm_trace("Destroying configured cluster node cache with %d members",
606 g_hash_table_size(cluster_node_cib_cache));
607 g_hash_table_destroy(cluster_node_cib_cache);
608 cluster_node_cib_cache = NULL;
609 }
610 }
611
612 static void (*peer_status_callback)(enum pcmk__node_update,
613 pcmk__node_status_t *,
614 const void *) = NULL;
615
616
617
618
619
620
621
622
623
624
625 void
626 pcmk__cluster_set_status_callback(void (*dispatch)(enum pcmk__node_update,
627 pcmk__node_status_t *,
628 const void *))
629 {
630
631 peer_status_callback = dispatch;
632 }
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648 void
649 pcmk__cluster_set_autoreap(bool enable)
650 {
651 autoreap = enable;
652 }
653
654 static void
655 dump_peer_hash(int level, const char *caller)
656 {
657 GHashTableIter iter;
658 const char *id = NULL;
659 pcmk__node_status_t *node = NULL;
660
661 g_hash_table_iter_init(&iter, pcmk__peer_cache);
662 while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
663 do_crm_log(level, "%s: Node %" PRIu32 "/%s = %p - %s",
664 caller, node->cluster_layer_id, node->name, node, id);
665 }
666 }
667
668 static gboolean
669 hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
670 {
671 return value == user_data;
672 }
673
674
675
676
677
678
679
680
681
682
683
684
685 static pcmk__node_status_t *
686 search_cluster_member_cache(unsigned int id, const char *uname,
687 const char *uuid)
688 {
689 GHashTableIter iter;
690 pcmk__node_status_t *node = NULL;
691 pcmk__node_status_t *by_id = NULL;
692 pcmk__node_status_t *by_name = NULL;
693
694 pcmk__assert((id > 0) || (uname != NULL));
695
696 pcmk__cluster_init_node_caches();
697
698 if (uname != NULL) {
699 g_hash_table_iter_init(&iter, pcmk__peer_cache);
700 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
701 if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) {
702 crm_trace("Name match: %s", node->name);
703 by_name = node;
704 break;
705 }
706 }
707 }
708
709 if (id > 0) {
710 g_hash_table_iter_init(&iter, pcmk__peer_cache);
711 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
712 if (node->cluster_layer_id == id) {
713 crm_trace("ID match: %" PRIu32, node->cluster_layer_id);
714 by_id = node;
715 break;
716 }
717 }
718
719 } else if (uuid != NULL) {
720 g_hash_table_iter_init(&iter, pcmk__peer_cache);
721 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
722 const char *this_xml_id = pcmk__cluster_get_xml_id(node);
723
724 if (pcmk__str_eq(uuid, this_xml_id, pcmk__str_none)) {
725 crm_trace("Found cluster node cache entry by XML ID %s",
726 this_xml_id);
727 by_id = node;
728 break;
729 }
730 }
731 }
732
733 node = by_id;
734 if(by_id == by_name) {
735
736 crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
737
738 } else if(by_id == NULL && by_name) {
739 crm_trace("Only one: %p for %u/%s", by_name, id, uname);
740
741 if (id && by_name->cluster_layer_id) {
742 dump_peer_hash(LOG_WARNING, __func__);
743 crm_crit("Nodes %u and %" PRIu32 " share the same name '%s'",
744 id, by_name->cluster_layer_id, uname);
745 node = NULL;
746
747 } else {
748 node = by_name;
749 }
750
751 } else if(by_name == NULL && by_id) {
752 crm_trace("Only one: %p for %u/%s", by_id, id, uname);
753
754 if ((uname != NULL) && (by_id->name != NULL)) {
755 dump_peer_hash(LOG_WARNING, __func__);
756 crm_crit("Nodes '%s' and '%s' share the same cluster nodeid %u: "
757 "assuming '%s' is correct",
758 uname, by_id->name, id, uname);
759 }
760
761 } else if ((uname != NULL) && (by_id->name != NULL)) {
762 if (pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) {
763 crm_notice("Node '%s' has changed its cluster layer ID "
764 "from %" PRIu32 " to %" PRIu32,
765 by_id->name, by_name->cluster_layer_id,
766 by_id->cluster_layer_id);
767 g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data,
768 by_name);
769
770 } else {
771 crm_warn("Nodes '%s' and '%s' share the same cluster nodeid: %u %s",
772 by_id->name, by_name->name, id, uname);
773 dump_peer_hash(LOG_INFO, __func__);
774 crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
775 TRUE);
776 }
777
778 } else if ((id > 0) && (by_name->cluster_layer_id > 0)) {
779 crm_warn("Nodes %" PRIu32 " and %" PRIu32 " share the same name: '%s'",
780 by_id->cluster_layer_id, by_name->cluster_layer_id, uname);
781
782 } else {
783
784
785
786
787
788
789 dump_peer_hash(LOG_DEBUG, __func__);
790
791 crm_info("Merging %p into %p", by_name, by_id);
792 g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data,
793 by_name);
794 }
795
796 return node;
797 }
798
799
800
801
802
803
804
805
806
807
808
809
810 pcmk__node_status_t *
811 pcmk__search_node_caches(unsigned int id, const char *uname,
812 const char *xml_id, uint32_t flags)
813 {
814 pcmk__node_status_t *node = NULL;
815
816 pcmk__assert((id > 0) || (uname != NULL) || (xml_id != NULL));
817
818 pcmk__cluster_init_node_caches();
819
820 if (pcmk_is_set(flags, pcmk__node_search_remote)) {
821 if (uname != NULL) {
822 node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
823 } else if (xml_id != NULL) {
824 node = g_hash_table_lookup(pcmk__remote_peer_cache, xml_id);
825 }
826 }
827
828 if ((node == NULL)
829 && pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
830
831 node = search_cluster_member_cache(id, uname, xml_id);
832 }
833
834 if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
835 if (xml_id != NULL) {
836 node = find_cib_cluster_node(xml_id, uname);
837 } else {
838
839 char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);
840
841 node = find_cib_cluster_node(id_str, uname);
842 free(id_str);
843 }
844 }
845
846 return node;
847 }
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863 void
864 pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
865 {
866 char *node_name_copy = NULL;
867
868 if ((node_name == NULL) && (node_id == 0U)) {
869 return;
870 }
871
872
873 if ((node_name != NULL)
874 && (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL)) {
875
876
877
878 node_name_copy = pcmk__str_copy(node_name);
879 node_name = node_name_copy;
880
881 crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
882 g_hash_table_remove(pcmk__remote_peer_cache, node_name);
883 }
884
885 pcmk__cluster_forget_cluster_node(node_id, node_name);
886 free(node_name_copy);
887 }
888
889 #if SUPPORT_COROSYNC
890 static guint
891 remove_conflicting_peer(pcmk__node_status_t *node)
892 {
893 int matches = 0;
894 GHashTableIter iter;
895 pcmk__node_status_t *existing_node = NULL;
896
897 if ((node->cluster_layer_id == 0) || (node->name == NULL)) {
898 return 0;
899 }
900
901 if (!pcmk__corosync_has_nodelist()) {
902 return 0;
903 }
904
905 g_hash_table_iter_init(&iter, pcmk__peer_cache);
906 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
907 if ((existing_node->cluster_layer_id > 0)
908 && (existing_node->cluster_layer_id != node->cluster_layer_id)
909 && pcmk__str_eq(existing_node->name, node->name, pcmk__str_casei)) {
910
911 if (pcmk__cluster_is_node_active(existing_node)) {
912 continue;
913 }
914
915 crm_warn("Removing cached offline node %" PRIu32 "/%s which has "
916 "conflicting name with %" PRIu32,
917 existing_node->cluster_layer_id, existing_node->name,
918 node->cluster_layer_id);
919
920 g_hash_table_iter_remove(&iter);
921 matches++;
922 }
923 }
924
925 return matches;
926 }
927 #endif
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946 pcmk__node_status_t *
947 pcmk__get_node(unsigned int id, const char *uname, const char *xml_id,
948 uint32_t flags)
949 {
950 pcmk__node_status_t *node = NULL;
951 char *uname_lookup = NULL;
952
953 pcmk__assert((id > 0) || (uname != NULL));
954
955 pcmk__cluster_init_node_caches();
956
957
958 if (pcmk_is_set(flags, pcmk__node_search_remote)) {
959 node = g_hash_table_lookup(pcmk__remote_peer_cache, uname);
960 if (node != NULL) {
961 return node;
962 }
963 }
964
965 if (!pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
966 return NULL;
967 }
968
969 node = search_cluster_member_cache(id, uname, xml_id);
970
971
972
973 if ((uname == NULL) && ((node == NULL) || (node->name == NULL))) {
974 uname_lookup = pcmk__cluster_node_name(id);
975 }
976
977 if (uname_lookup) {
978 uname = uname_lookup;
979 crm_trace("Inferred a name of '%s' for node %u", uname, id);
980
981
982 if (node == NULL) {
983 node = search_cluster_member_cache(id, uname, xml_id);
984 }
985 }
986
987 if (node == NULL) {
988 char *uniqueid = crm_generate_uuid();
989
990 node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t));
991
992 crm_info("Created entry %s/%p for node %s/%u (%d total)",
993 uniqueid, node, uname, id,
994 1 + g_hash_table_size(pcmk__peer_cache));
995 g_hash_table_replace(pcmk__peer_cache, uniqueid, node);
996 }
997
998 if ((id > 0) && (uname != NULL)
999 && ((node->cluster_layer_id == 0) || (node->name == NULL))) {
1000 crm_info("Node %u is now known as %s", id, uname);
1001 }
1002
1003 if ((id > 0) && (node->cluster_layer_id == 0)) {
1004 node->cluster_layer_id = id;
1005 }
1006
1007 if ((uname != NULL) && (node->name == NULL)) {
1008 update_peer_uname(node, uname);
1009 }
1010
1011 if ((xml_id == NULL) && (node->xml_id == NULL)) {
1012 xml_id = pcmk__cluster_get_xml_id(node);
1013 if (xml_id == NULL) {
1014 crm_debug("Cannot obtain an XML ID for node %s[%u] at this time",
1015 node->name, id);
1016 } else {
1017 crm_info("Node %s[%u] has XML ID %s", node->name, id, xml_id);
1018 }
1019 }
1020
1021 free(uname_lookup);
1022
1023 return node;
1024 }
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037 static void
1038 update_peer_uname(pcmk__node_status_t *node, const char *uname)
1039 {
1040 CRM_CHECK(uname != NULL,
1041 crm_err("Bug: can't update node name without name"); return);
1042 CRM_CHECK(node != NULL,
1043 crm_err("Bug: can't update node name to %s without node", uname);
1044 return);
1045
1046 if (pcmk__str_eq(uname, node->name, pcmk__str_casei)) {
1047 crm_debug("Node name '%s' did not change", uname);
1048 return;
1049 }
1050
1051 for (const char *c = uname; *c; ++c) {
1052 if ((*c >= 'A') && (*c <= 'Z')) {
1053 crm_warn("Node names with capitals are discouraged, consider changing '%s'",
1054 uname);
1055 break;
1056 }
1057 }
1058
1059 pcmk__str_update(&node->name, uname);
1060
1061 if (peer_status_callback != NULL) {
1062 peer_status_callback(pcmk__node_update_name, node, NULL);
1063 }
1064
1065 #if SUPPORT_COROSYNC
1066 if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)
1067 && !pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1068
1069 remove_conflicting_peer(node);
1070 }
1071 #endif
1072 }
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082 static inline const char *
1083 proc2text(enum crm_proc_flag proc)
1084 {
1085 const char *text = "unknown";
1086
1087 switch (proc) {
1088 case crm_proc_none:
1089 text = "none";
1090 break;
1091 case crm_proc_cpg:
1092 text = "corosync-cpg";
1093 break;
1094 }
1095 return text;
1096 }
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114 pcmk__node_status_t *
1115 crm_update_peer_proc(const char *source, pcmk__node_status_t *node,
1116 uint32_t flag, const char *status)
1117 {
1118 uint32_t last = 0;
1119 gboolean changed = FALSE;
1120
1121 CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
1122 source, proc2text(flag), status);
1123 return NULL);
1124
1125
1126 if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1127 return node;
1128 }
1129
1130 last = node->processes;
1131 if (status == NULL) {
1132 node->processes = flag;
1133 if (node->processes != last) {
1134 changed = TRUE;
1135 }
1136
1137 } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1138 if ((node->processes & flag) != flag) {
1139 node->processes = pcmk__set_flags_as(__func__, __LINE__,
1140 LOG_TRACE, "Peer process",
1141 node->name, node->processes,
1142 flag, "processes");
1143 changed = TRUE;
1144 }
1145
1146 } else if (node->processes & flag) {
1147 node->processes = pcmk__clear_flags_as(__func__, __LINE__,
1148 LOG_TRACE, "Peer process",
1149 node->name, node->processes,
1150 flag, "processes");
1151 changed = TRUE;
1152 }
1153
1154 if (changed) {
1155 if (status == NULL && flag <= crm_proc_none) {
1156 crm_info("%s: Node %s[%" PRIu32 "] - all processes are now offline",
1157 source, node->name, node->cluster_layer_id);
1158 } else {
1159 crm_info("%s: Node %s[%" PRIu32 "] - %s is now %s",
1160 source, node->name, node->cluster_layer_id,
1161 proc2text(flag), status);
1162 }
1163
1164 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1165 node->when_online = time(NULL);
1166
1167 } else {
1168 node->when_online = 0;
1169 }
1170
1171
1172
1173
1174 if (peer_status_callback != NULL) {
1175 peer_status_callback(pcmk__node_update_processes, node, &last);
1176 }
1177
1178
1179
1180
1181 if (pcmk__peer_cache == NULL) {
1182 return NULL;
1183 }
1184
1185 if (autoreap) {
1186 const char *peer_state = NULL;
1187
1188 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
1189 peer_state = PCMK_VALUE_MEMBER;
1190 } else {
1191 peer_state = PCMK__VALUE_LOST;
1192 }
1193 node = pcmk__update_peer_state(__func__, node, peer_state, 0);
1194 }
1195 } else {
1196 crm_trace("%s: Node %s[%" PRIu32 "] - %s is unchanged (%s)",
1197 source, node->name, node->cluster_layer_id, proc2text(flag),
1198 status);
1199 }
1200 return node;
1201 }
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211 void
1212 pcmk__update_peer_expected(const char *source, pcmk__node_status_t *node,
1213 const char *expected)
1214 {
1215 char *last = NULL;
1216 gboolean changed = FALSE;
1217
1218 CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
1219 return);
1220
1221
1222 if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1223 return;
1224 }
1225
1226 last = node->expected;
1227 if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
1228 node->expected = strdup(expected);
1229 changed = TRUE;
1230 }
1231
1232 if (changed) {
1233 crm_info("%s: Node %s[%" PRIu32 "] - expected state is now %s (was %s)",
1234 source, node->name, node->cluster_layer_id, expected, last);
1235 free(last);
1236 } else {
1237 crm_trace("%s: Node %s[%" PRIu32 "] - expected state is unchanged (%s)",
1238 source, node->name, node->cluster_layer_id, expected);
1239 }
1240 }
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258 static pcmk__node_status_t *
1259 update_peer_state_iter(const char *source, pcmk__node_status_t *node,
1260 const char *state, uint64_t membership,
1261 GHashTableIter *iter)
1262 {
1263 gboolean is_member;
1264
1265 CRM_CHECK(node != NULL,
1266 crm_err("Could not set state for unknown host to %s "
1267 QB_XS " source=%s", state, source);
1268 return NULL);
1269
1270 is_member = pcmk__str_eq(state, PCMK_VALUE_MEMBER, pcmk__str_none);
1271 if (is_member) {
1272 node->when_lost = 0;
1273 if (membership) {
1274 node->membership_id = membership;
1275 }
1276 }
1277
1278 if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
1279 char *last = node->state;
1280
1281 if (is_member) {
1282 node->when_member = time(NULL);
1283
1284 } else {
1285 node->when_member = 0;
1286 }
1287
1288 node->state = strdup(state);
1289 crm_notice("Node %s state is now %s " QB_XS
1290 " nodeid=%" PRIu32 " previous=%s source=%s",
1291 node->name, state, node->cluster_layer_id,
1292 pcmk__s(last, "unknown"), source);
1293 if (peer_status_callback != NULL) {
1294 peer_status_callback(pcmk__node_update_state, node, last);
1295 }
1296 free(last);
1297
1298 if (autoreap && !is_member
1299 && !pcmk_is_set(node->flags, pcmk__node_status_remote)) {
1300
1301
1302
1303
1304 if(iter) {
1305 crm_notice("Purged 1 peer with cluster layer ID %" PRIu32
1306 "and/or name=%s from the membership cache",
1307 node->cluster_layer_id, node->name);
1308 g_hash_table_iter_remove(iter);
1309
1310 } else {
1311 pcmk__cluster_forget_cluster_node(node->cluster_layer_id,
1312 node->name);
1313 }
1314 node = NULL;
1315 }
1316
1317 } else {
1318 crm_trace("Node %s state is unchanged (%s) " QB_XS
1319 " nodeid=%" PRIu32 " source=%s",
1320 node->name, state, node->cluster_layer_id, source);
1321 }
1322 return node;
1323 }
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340 pcmk__node_status_t *
1341 pcmk__update_peer_state(const char *source, pcmk__node_status_t *node,
1342 const char *state, uint64_t membership)
1343 {
1344 return update_peer_state_iter(source, node, state, membership, NULL);
1345 }
1346
1347
1348
1349
1350
1351
1352
1353 void
1354 pcmk__reap_unseen_nodes(uint64_t membership)
1355 {
1356 GHashTableIter iter;
1357 pcmk__node_status_t *node = NULL;
1358
1359 crm_trace("Reaping unseen nodes...");
1360 g_hash_table_iter_init(&iter, pcmk__peer_cache);
1361 while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1362 if (node->membership_id != membership) {
1363 if (node->state) {
1364
1365
1366
1367 update_peer_state_iter(__func__, node, PCMK__VALUE_LOST,
1368 membership, &iter);
1369
1370 } else {
1371 crm_info("State of node %s[%" PRIu32 "] is still unknown",
1372 node->name, node->cluster_layer_id);
1373 }
1374 }
1375 }
1376 }
1377
1378 static pcmk__node_status_t *
1379 find_cib_cluster_node(const char *id, const char *uname)
1380 {
1381 GHashTableIter iter;
1382 pcmk__node_status_t *node = NULL;
1383 pcmk__node_status_t *by_id = NULL;
1384 pcmk__node_status_t *by_name = NULL;
1385
1386 if (uname) {
1387 g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1388 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1389 if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) {
1390 crm_trace("Name match: %s = %p", node->name, node);
1391 by_name = node;
1392 break;
1393 }
1394 }
1395 }
1396
1397 if (id) {
1398 g_hash_table_iter_init(&iter, cluster_node_cib_cache);
1399 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1400 if (pcmk__str_eq(id, pcmk__cluster_get_xml_id(node),
1401 pcmk__str_none)) {
1402 crm_trace("ID match: %s= %p", id, node);
1403 by_id = node;
1404 break;
1405 }
1406 }
1407 }
1408
1409 node = by_id;
1410 if (by_id == by_name) {
1411
1412 crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1413
1414 } else if (by_id == NULL && by_name) {
1415 crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1416
1417 if (id) {
1418 node = NULL;
1419
1420 } else {
1421 node = by_name;
1422 }
1423
1424 } else if (by_name == NULL && by_id) {
1425 crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1426
1427 if (uname) {
1428 node = NULL;
1429 }
1430
1431 } else if ((uname != NULL) && (by_id->name != NULL)
1432 && pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) {
1433
1434
1435
1436 } else if ((id != NULL) && (by_name->xml_id != NULL)
1437 && pcmk__str_eq(id, by_name->xml_id, pcmk__str_none)) {
1438
1439
1440 node = by_name;
1441
1442 } else {
1443 node = NULL;
1444 }
1445
1446 if (node == NULL) {
1447 crm_debug("Couldn't find node%s%s%s%s",
1448 id? " " : "",
1449 id? id : "",
1450 uname? " with name " : "",
1451 uname? uname : "");
1452 }
1453
1454 return node;
1455 }
1456
1457 static void
1458 cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1459 {
1460 const char *id = crm_element_value(xml_node, PCMK_XA_ID);
1461 const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME);
1462 pcmk__node_status_t * node = NULL;
1463
1464 CRM_CHECK(id != NULL && uname !=NULL, return);
1465 node = find_cib_cluster_node(id, uname);
1466
1467 if (node == NULL) {
1468 char *uniqueid = crm_generate_uuid();
1469
1470 node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t));
1471
1472 node->name = pcmk__str_copy(uname);
1473 node->xml_id = pcmk__str_copy(id);
1474
1475 g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);
1476
1477 } else if (pcmk_is_set(node->flags, pcmk__node_status_dirty)) {
1478 pcmk__str_update(&node->name, uname);
1479
1480
1481 clear_peer_flags(node, pcmk__node_status_dirty);
1482 }
1483
1484 }
1485
1486 static void
1487 refresh_cluster_node_cib_cache(xmlNode *cib)
1488 {
1489 pcmk__cluster_init_node_caches();
1490
1491 g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);
1492
1493 pcmk__xpath_foreach_result(cib->doc, PCMK__XP_MEMBER_NODE_CONFIG,
1494 cluster_node_cib_cache_refresh_helper, NULL);
1495
1496
1497 g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
1498 }
1499
1500 void
1501 pcmk__refresh_node_caches_from_cib(xmlNode *cib)
1502 {
1503 refresh_remote_nodes(cib);
1504 refresh_cluster_node_cib_cache(cib);
1505 }
1506
1507
1508
1509
1510 #include <crm/cluster/compat.h>
1511
1512 void
1513 crm_peer_init(void)
1514 {
1515 pcmk__cluster_init_node_caches();
1516 }
1517
1518
1519