This source file includes following definitions.
- pcmk__corosync_uuid
- node_name_is_valid
- pcmk__corosync_name
- pcmk__corosync_disconnect
- quorum_dispatch_cb
- quorum_notification_cb
- pcmk__corosync_quorum_connect
- pcmk__corosync_connect
- pcmk__corosync_detect
- crm_is_corosync_peer_active
- pcmk__corosync_add_nodes
- pcmk__corosync_cluster_name
- pcmk__corosync_has_nodelist
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/socket.h>
13 #include <netinet/in.h>
14 #include <arpa/inet.h>
15 #include <netdb.h>
16 #include <inttypes.h>
17
18 #include <bzlib.h>
19
20 #include <crm/common/ipc.h>
21 #include <crm/cluster/internal.h>
22 #include <crm/common/mainloop.h>
23 #include <sys/utsname.h>
24
25 #include <qb/qbipcc.h>
26 #include <qb/qbutil.h>
27
28 #include <corosync/corodefs.h>
29 #include <corosync/corotypes.h>
30 #include <corosync/hdb.h>
31 #include <corosync/cfg.h>
32 #include <corosync/cmap.h>
33 #include <corosync/quorum.h>
34
35 #include <crm/msg_xml.h>
36
37 #include <crm/common/ipc_internal.h>
38 #include "crmcluster_private.h"
39
40 static quorum_handle_t pcmk_quorum_handle = 0;
41
42 static gboolean (*quorum_app_callback)(unsigned long long seq,
43 gboolean quorate) = NULL;
44
45
46
47
48
49
50
51
52
53
54 char *
55 pcmk__corosync_uuid(crm_node_t *node)
56 {
57 if ((node != NULL) && is_corosync_cluster()) {
58 if (node->id > 0) {
59 return crm_strdup_printf("%u", node->id);
60 } else {
61 crm_info("Node %s is not yet known by corosync", node->uname);
62 }
63 }
64 return NULL;
65 }
66
67 static bool
68 node_name_is_valid(const char *key, const char *name)
69 {
70 int octet;
71
72 if (name == NULL) {
73 crm_trace("%s is empty", key);
74 return false;
75
76 } else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) {
77 crm_trace("%s contains an IPv4 address (%s), ignoring", key, name);
78 return false;
79
80 } else if (strstr(name, ":") != NULL) {
81 crm_trace("%s contains an IPv6 address (%s), ignoring", key, name);
82 return false;
83 }
84 crm_trace("'%s: %s' is valid", key, name);
85 return true;
86 }
87
88
89
90
91
92
93
94
95
96
97
98
99
100 char *
101 pcmk__corosync_name(uint64_t cmap_handle, uint32_t nodeid)
102 {
103
104
105 int lpc = 0;
106 cs_error_t rc = CS_OK;
107 int retries = 0;
108 char *name = NULL;
109 cmap_handle_t local_handle = 0;
110 int fd = -1;
111 uid_t found_uid = 0;
112 gid_t found_gid = 0;
113 pid_t found_pid = 0;
114 int rv;
115
116 if (nodeid == 0) {
117 nodeid = get_local_nodeid(0);
118 }
119
120 if (cmap_handle == 0 && local_handle == 0) {
121 retries = 0;
122 crm_trace("Initializing CMAP connection");
123 do {
124 rc = cmap_initialize(&local_handle);
125 if (rc != CS_OK) {
126 retries++;
127 crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
128 retries);
129 sleep(retries);
130 }
131
132 } while (retries < 5 && rc != CS_OK);
133
134 if (rc != CS_OK) {
135 crm_warn("Could not connect to Cluster Configuration Database API, error %s",
136 cs_strerror(rc));
137 local_handle = 0;
138 }
139 }
140
141 if (cmap_handle == 0) {
142 cmap_handle = local_handle;
143
144 rc = cmap_fd_get(cmap_handle, &fd);
145 if (rc != CS_OK) {
146 crm_err("Could not obtain the CMAP API connection: %s (%d)",
147 cs_strerror(rc), rc);
148 goto bail;
149 }
150
151
152 if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
153 &found_uid, &found_gid))) {
154 crm_err("CMAP provider is not authentic:"
155 " process %lld (uid: %lld, gid: %lld)",
156 (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
157 (long long) found_uid, (long long) found_gid);
158 goto bail;
159 } else if (rv < 0) {
160 crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
161 strerror(-rv), -rv);
162 goto bail;
163 }
164 }
165
166 while (name == NULL && cmap_handle != 0) {
167 uint32_t id = 0;
168 char *key = NULL;
169
170 key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
171 rc = cmap_get_uint32(cmap_handle, key, &id);
172 crm_trace("Checking %u vs %u from %s", nodeid, id, key);
173 free(key);
174
175 if (rc != CS_OK) {
176 break;
177 }
178
179 if (nodeid == id) {
180 crm_trace("Searching for node name for %u in nodelist.node.%d %s",
181 nodeid, lpc, crm_str(name));
182 if (name == NULL) {
183 key = crm_strdup_printf("nodelist.node.%d.name", lpc);
184 cmap_get_string(cmap_handle, key, &name);
185 crm_trace("%s = %s", key, crm_str(name));
186 free(key);
187 }
188 if (name == NULL) {
189 key = crm_strdup_printf("nodelist.node.%d.ring0_addr", lpc);
190 cmap_get_string(cmap_handle, key, &name);
191 crm_trace("%s = %s", key, crm_str(name));
192
193 if (!node_name_is_valid(key, name)) {
194 free(name);
195 name = NULL;
196 }
197 free(key);
198 }
199 break;
200 }
201
202 lpc++;
203 }
204
205 bail:
206 if(local_handle) {
207 cmap_finalize(local_handle);
208 }
209
210 if (name == NULL) {
211 crm_info("Unable to get node name for nodeid %u", nodeid);
212 }
213 return name;
214 }
215
216
217
218
219
220
221
222 void
223 pcmk__corosync_disconnect(crm_cluster_t *cluster)
224 {
225 cluster_disconnect_cpg(cluster);
226 if (pcmk_quorum_handle) {
227 quorum_finalize(pcmk_quorum_handle);
228 pcmk_quorum_handle = 0;
229 }
230 crm_notice("Disconnected from Corosync");
231 }
232
233
234
235
236
237
238
239
240
241 static int
242 quorum_dispatch_cb(gpointer user_data)
243 {
244 int rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL);
245
246 if (rc < 0) {
247 crm_err("Connection to the Quorum API failed: %d", rc);
248 quorum_finalize(pcmk_quorum_handle);
249 pcmk_quorum_handle = 0;
250 return -1;
251 }
252 return 0;
253 }
254
255
256
257
258
259
260
261
262
263
264
265 static void
266 quorum_notification_cb(quorum_handle_t handle, uint32_t quorate,
267 uint64_t ring_id, uint32_t view_list_entries,
268 uint32_t *view_list)
269 {
270 int i;
271 GHashTableIter iter;
272 crm_node_t *node = NULL;
273 static gboolean init_phase = TRUE;
274
275 if (quorate != crm_have_quorum) {
276 if (quorate) {
277 crm_notice("Quorum acquired " CRM_XS " membership=%" U64T " members=%lu",
278 ring_id, (long unsigned int)view_list_entries);
279 } else {
280 crm_warn("Quorum lost " CRM_XS " membership=%" U64T " members=%lu",
281 ring_id, (long unsigned int)view_list_entries);
282 }
283 crm_have_quorum = quorate;
284
285 } else {
286 crm_info("Quorum %s " CRM_XS " membership=%" U64T " members=%lu",
287 (quorate? "retained" : "still lost"), ring_id,
288 (long unsigned int)view_list_entries);
289 }
290
291 if (view_list_entries == 0 && init_phase) {
292 crm_info("Corosync membership is still forming, ignoring");
293 return;
294 }
295
296 init_phase = FALSE;
297
298
299
300 g_hash_table_iter_init(&iter, crm_peer_cache);
301 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
302 node->last_seen = 0;
303 }
304
305
306 for (i = 0; i < view_list_entries; i++) {
307 uint32_t id = view_list[i];
308
309 crm_debug("Member[%d] %u ", i, id);
310
311
312 node = crm_get_peer(id, NULL);
313 if (node->uname == NULL) {
314 char *name = pcmk__corosync_name(0, id);
315
316 crm_info("Obtaining name for new node %u", id);
317 node = crm_get_peer(id, name);
318 free(name);
319 }
320
321
322 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, ring_id);
323 }
324
325
326 pcmk__reap_unseen_nodes(ring_id);
327
328 if (quorum_app_callback) {
329 quorum_app_callback(ring_id, quorate);
330 }
331 }
332
333 quorum_callbacks_t quorum_callbacks = {
334 .quorum_notify_fn = quorum_notification_cb,
335 };
336
337
338
339
340
341
342
343
344 void
345 pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long,
346 gboolean),
347 void (*destroy)(gpointer))
348 {
349 cs_error_t rc;
350 int fd = 0;
351 int quorate = 0;
352 uint32_t quorum_type = 0;
353 struct mainloop_fd_callbacks quorum_fd_callbacks;
354 uid_t found_uid = 0;
355 gid_t found_gid = 0;
356 pid_t found_pid = 0;
357 int rv;
358
359 quorum_fd_callbacks.dispatch = quorum_dispatch_cb;
360 quorum_fd_callbacks.destroy = destroy;
361
362 crm_debug("Configuring Pacemaker to obtain quorum from Corosync");
363
364 rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type);
365 if (rc != CS_OK) {
366 crm_err("Could not connect to the Quorum API: %s (%d)",
367 cs_strerror(rc), rc);
368 goto bail;
369
370 } else if (quorum_type != QUORUM_SET) {
371 crm_err("Corosync quorum is not configured");
372 goto bail;
373 }
374
375 rc = quorum_fd_get(pcmk_quorum_handle, &fd);
376 if (rc != CS_OK) {
377 crm_err("Could not obtain the Quorum API connection: %s (%d)",
378 strerror(rc), rc);
379 goto bail;
380 }
381
382
383 if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
384 &found_uid, &found_gid))) {
385 crm_err("Quorum provider is not authentic:"
386 " process %lld (uid: %lld, gid: %lld)",
387 (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
388 (long long) found_uid, (long long) found_gid);
389 rc = CS_ERR_ACCESS;
390 goto bail;
391 } else if (rv < 0) {
392 crm_err("Could not verify authenticity of Quorum provider: %s (%d)",
393 strerror(-rv), -rv);
394 rc = CS_ERR_ACCESS;
395 goto bail;
396 }
397
398 rc = quorum_getquorate(pcmk_quorum_handle, &quorate);
399 if (rc != CS_OK) {
400 crm_err("Could not obtain the current Quorum API state: %d", rc);
401 goto bail;
402 }
403
404 if (quorate) {
405 crm_notice("Quorum acquired");
406 } else {
407 crm_warn("Quorum lost");
408 }
409 quorum_app_callback = dispatch;
410 crm_have_quorum = quorate;
411
412 rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT);
413 if (rc != CS_OK) {
414 crm_err("Could not setup Quorum API notifications: %d", rc);
415 goto bail;
416 }
417
418 mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks);
419
420 pcmk__corosync_add_nodes(NULL);
421
422 bail:
423 if (rc != CS_OK) {
424 quorum_finalize(pcmk_quorum_handle);
425 }
426 }
427
428
429
430
431
432
433
434 gboolean
435 pcmk__corosync_connect(crm_cluster_t *cluster)
436 {
437 crm_node_t *peer = NULL;
438 enum cluster_type_e stack = get_cluster_type();
439
440 crm_peer_init();
441
442 if (stack != pcmk_cluster_corosync) {
443 crm_err("Invalid cluster type: %s " CRM_XS " stack=%d",
444 name_for_cluster_type(stack), stack);
445 return FALSE;
446 }
447
448 if (!cluster_connect_cpg(cluster)) {
449
450 return FALSE;
451 }
452 crm_info("Connection to %s established", name_for_cluster_type(stack));
453
454 cluster->nodeid = get_local_nodeid(0);
455 if (cluster->nodeid == 0) {
456 crm_err("Could not determine local node ID");
457 return FALSE;
458 }
459
460 cluster->uname = get_node_name(0);
461 if (cluster->uname == NULL) {
462 crm_err("Could not determine local node name");
463 return FALSE;
464 }
465
466
467 peer = crm_get_peer(cluster->nodeid, cluster->uname);
468 cluster->uuid = pcmk__corosync_uuid(peer);
469
470 return TRUE;
471 }
472
473
474
475
476
477
478
479 enum cluster_type_e
480 pcmk__corosync_detect(void)
481 {
482 int rc = CS_OK;
483 cmap_handle_t handle;
484
485 rc = cmap_initialize(&handle);
486
487 switch(rc) {
488 case CS_OK:
489 break;
490 case CS_ERR_SECURITY:
491 crm_debug("Failed to initialize the cmap API: Permission denied (%d)", rc);
492
493
494
495 return pcmk_cluster_corosync;
496
497 default:
498 crm_info("Failed to initialize the cmap API: %s (%d)",
499 pcmk__cs_err_str(rc), rc);
500 return pcmk_cluster_unknown;
501 }
502
503 cmap_finalize(handle);
504 return pcmk_cluster_corosync;
505 }
506
507
508
509
510
511
512
513
514 gboolean
515 crm_is_corosync_peer_active(const crm_node_t *node)
516 {
517 if (node == NULL) {
518 crm_trace("Corosync peer inactive: NULL");
519 return FALSE;
520
521 } else if (!pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
522 crm_trace("Corosync peer %s inactive: state=%s",
523 node->uname, node->state);
524 return FALSE;
525
526 } else if (!pcmk_is_set(node->processes, crm_proc_cpg)) {
527 crm_trace("Corosync peer %s inactive: processes=%.16x",
528 node->uname, node->processes);
529 return FALSE;
530 }
531 return TRUE;
532 }
533
534
535
536
537
538
539
540
541
542 bool
543 pcmk__corosync_add_nodes(xmlNode *xml_parent)
544 {
545 int lpc = 0;
546 cs_error_t rc = CS_OK;
547 int retries = 0;
548 bool any = false;
549 cmap_handle_t cmap_handle;
550 int fd = -1;
551 uid_t found_uid = 0;
552 gid_t found_gid = 0;
553 pid_t found_pid = 0;
554 int rv;
555
556 do {
557 rc = cmap_initialize(&cmap_handle);
558 if (rc != CS_OK) {
559 retries++;
560 crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
561 retries);
562 sleep(retries);
563 }
564
565 } while (retries < 5 && rc != CS_OK);
566
567 if (rc != CS_OK) {
568 crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
569 return false;
570 }
571
572 rc = cmap_fd_get(cmap_handle, &fd);
573 if (rc != CS_OK) {
574 crm_err("Could not obtain the CMAP API connection: %s (%d)",
575 cs_strerror(rc), rc);
576 goto bail;
577 }
578
579
580 if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
581 &found_uid, &found_gid))) {
582 crm_err("CMAP provider is not authentic:"
583 " process %lld (uid: %lld, gid: %lld)",
584 (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
585 (long long) found_uid, (long long) found_gid);
586 goto bail;
587 } else if (rv < 0) {
588 crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
589 strerror(-rv), -rv);
590 goto bail;
591 }
592
593 crm_peer_init();
594 crm_trace("Initializing corosync nodelist");
595 for (lpc = 0; TRUE; lpc++) {
596 uint32_t nodeid = 0;
597 char *name = NULL;
598 char *key = NULL;
599
600 key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
601 rc = cmap_get_uint32(cmap_handle, key, &nodeid);
602 free(key);
603
604 if (rc != CS_OK) {
605 break;
606 }
607
608 name = pcmk__corosync_name(cmap_handle, nodeid);
609 if (name != NULL) {
610 GHashTableIter iter;
611 crm_node_t *node = NULL;
612
613 g_hash_table_iter_init(&iter, crm_peer_cache);
614 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
615 if(node && node->uname && strcasecmp(node->uname, name) == 0) {
616 if (node->id && node->id != nodeid) {
617 crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id,
618 nodeid, name);
619 crm_exit(CRM_EX_FATAL);
620 }
621 }
622 }
623 }
624
625 if (nodeid > 0 || name != NULL) {
626 crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name);
627 crm_get_peer(nodeid, name);
628 }
629
630 if (nodeid > 0 && name != NULL) {
631 any = true;
632
633 if (xml_parent) {
634 xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE);
635
636 crm_xml_set_id(node, "%u", nodeid);
637 crm_xml_add(node, XML_ATTR_UNAME, name);
638 }
639 }
640
641 free(name);
642 }
643 bail:
644 cmap_finalize(cmap_handle);
645 return any;
646 }
647
648
649
650
651
652
653
654 char *
655 pcmk__corosync_cluster_name(void)
656 {
657 cmap_handle_t handle;
658 char *cluster_name = NULL;
659 cs_error_t rc = CS_OK;
660 int fd = -1;
661 uid_t found_uid = 0;
662 gid_t found_gid = 0;
663 pid_t found_pid = 0;
664 int rv;
665
666 rc = cmap_initialize(&handle);
667 if (rc != CS_OK) {
668 crm_info("Failed to initialize the cmap API: %s (%d)",
669 cs_strerror(rc), rc);
670 return NULL;
671 }
672
673 rc = cmap_fd_get(handle, &fd);
674 if (rc != CS_OK) {
675 crm_err("Could not obtain the CMAP API connection: %s (%d)",
676 cs_strerror(rc), rc);
677 goto bail;
678 }
679
680
681 if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
682 &found_uid, &found_gid))) {
683 crm_err("CMAP provider is not authentic:"
684 " process %lld (uid: %lld, gid: %lld)",
685 (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
686 (long long) found_uid, (long long) found_gid);
687 goto bail;
688 } else if (rv < 0) {
689 crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
690 strerror(-rv), -rv);
691 goto bail;
692 }
693
694 rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name);
695 if (rc != CS_OK) {
696 crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc);
697
698 } else {
699 crm_debug("cmap totem.cluster_name = '%s'", cluster_name);
700 }
701
702 bail:
703 cmap_finalize(handle);
704 return cluster_name;
705 }
706
707
708
709
710
711
712
713 bool
714 pcmk__corosync_has_nodelist(void)
715 {
716 cs_error_t cs_rc = CS_OK;
717 int retries = 0;
718 cmap_handle_t cmap_handle;
719 cmap_iter_handle_t iter_handle;
720 char key_name[CMAP_KEYNAME_MAXLEN + 1];
721 int fd = -1;
722 uid_t found_uid = 0;
723 gid_t found_gid = 0;
724 pid_t found_pid = 0;
725 int rc = pcmk_ok;
726
727 static bool got_result = false;
728 static bool result = false;
729
730 if (got_result) {
731 return result;
732 }
733
734
735 do {
736 cs_rc = cmap_initialize(&cmap_handle);
737 if (cs_rc != CS_OK) {
738 retries++;
739 crm_debug("CMAP connection failed: %s (rc=%d, retrying in %ds)",
740 cs_strerror(cs_rc), cs_rc, retries);
741 sleep(retries);
742 }
743 } while ((retries < 5) && (cs_rc != CS_OK));
744 if (cs_rc != CS_OK) {
745 crm_warn("Assuming Corosync does not have node list: "
746 "CMAP connection failed (%s) " CRM_XS " rc=%d",
747 cs_strerror(cs_rc), cs_rc);
748 return false;
749 }
750
751
752 cs_rc = cmap_fd_get(cmap_handle, &fd);
753 if (cs_rc != CS_OK) {
754 crm_warn("Assuming Corosync does not have node list: "
755 "CMAP unusable (%s) " CRM_XS " rc=%d",
756 cs_strerror(cs_rc), cs_rc);
757 goto bail;
758 }
759
760
761 rc = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0,
762 &found_pid, &found_uid, &found_gid);
763 if (rc == 0) {
764 crm_warn("Assuming Corosync does not have node list: "
765 "CMAP provider is inauthentic "
766 CRM_XS " pid=%lld uid=%lld gid=%lld",
767 (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
768 (long long) found_uid, (long long) found_gid);
769 goto bail;
770 } else if (rc < 0) {
771 crm_warn("Assuming Corosync does not have node list: "
772 "Could not verify CMAP authenticity (%s) " CRM_XS " rc=%d",
773 pcmk_strerror(rc), rc);
774 goto bail;
775 }
776
777
778 cs_rc = cmap_iter_init(cmap_handle, "nodelist", &iter_handle);
779 if (cs_rc != CS_OK) {
780 crm_warn("Assuming Corosync does not have node list: "
781 "CMAP not readable (%s) " CRM_XS " rc=%d",
782 cs_strerror(cs_rc), cs_rc);
783 goto bail;
784 }
785
786 cs_rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL);
787 if (cs_rc == CS_OK) {
788 result = true;
789 }
790
791 cmap_iter_finalize(cmap_handle, iter_handle);
792 got_result = true;
793 crm_debug("Corosync %s node list", (result? "has" : "does not have"));
794
795 bail:
796 cmap_finalize(cmap_handle);
797 return result;
798 }