This source file includes following definitions.
- pcmk__corosync_uuid
- node_name_is_valid
- pcmk__corosync_name
- pcmk__corosync_disconnect
- quorum_dispatch_cb
- quorum_notification_cb
- pcmk__corosync_quorum_connect
- pcmk__corosync_connect
- pcmk__corosync_detect
- crm_is_corosync_peer_active
- pcmk__corosync_add_nodes
- pcmk__corosync_cluster_name
- pcmk__corosync_has_nodelist
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/socket.h>
13 #include <netinet/in.h>
14 #include <arpa/inet.h>
15 #include <netdb.h>
16 #include <inttypes.h>
17
18 #include <bzlib.h>
19
20 #include <crm/common/ipc.h>
21 #include <crm/cluster/internal.h>
22 #include <crm/common/mainloop.h>
23 #include <sys/utsname.h>
24
25 #include <qb/qbipcc.h>
26 #include <qb/qbutil.h>
27
28 #include <corosync/corodefs.h>
29 #include <corosync/corotypes.h>
30 #include <corosync/hdb.h>
31 #include <corosync/cfg.h>
32 #include <corosync/cmap.h>
33 #include <corosync/quorum.h>
34
35 #include <crm/msg_xml.h>
36
37 #include <crm/common/ipc_internal.h>
38 #include "crmcluster_private.h"
39
40 static quorum_handle_t pcmk_quorum_handle = 0;
41
42 static gboolean (*quorum_app_callback)(unsigned long long seq,
43 gboolean quorate) = NULL;
44
45
46
47
48
49
50
51
52
53
54 char *
55 pcmk__corosync_uuid(crm_node_t *node)
56 {
57 if ((node != NULL) && is_corosync_cluster()) {
58 if (node->id > 0) {
59 return crm_strdup_printf("%u", node->id);
60 } else {
61 crm_info("Node %s is not yet known by Corosync", node->uname);
62 }
63 }
64 return NULL;
65 }
66
67 static bool
68 node_name_is_valid(const char *key, const char *name)
69 {
70 int octet;
71
72 if (name == NULL) {
73 crm_trace("%s is empty", key);
74 return false;
75
76 } else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) {
77 crm_trace("%s contains an IPv4 address (%s), ignoring", key, name);
78 return false;
79
80 } else if (strstr(name, ":") != NULL) {
81 crm_trace("%s contains an IPv6 address (%s), ignoring", key, name);
82 return false;
83 }
84 crm_trace("'%s: %s' is valid", key, name);
85 return true;
86 }
87
88
89
90
91
92
93
94
95
96
97
98
99
100 char *
101 pcmk__corosync_name(uint64_t cmap_handle, uint32_t nodeid)
102 {
103
104
105 int lpc = 0;
106 cs_error_t rc = CS_OK;
107 int retries = 0;
108 char *name = NULL;
109 cmap_handle_t local_handle = 0;
110 int fd = -1;
111 uid_t found_uid = 0;
112 gid_t found_gid = 0;
113 pid_t found_pid = 0;
114 int rv;
115
116 if (nodeid == 0) {
117 nodeid = get_local_nodeid(0);
118 }
119
120 if (cmap_handle == 0 && local_handle == 0) {
121 retries = 0;
122 crm_trace("Initializing CMAP connection");
123 do {
124 rc = pcmk__init_cmap(&local_handle);
125 if (rc != CS_OK) {
126 retries++;
127 crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
128 retries);
129 sleep(retries);
130 }
131
132 } while (retries < 5 && rc != CS_OK);
133
134 if (rc != CS_OK) {
135 crm_warn("Could not connect to Cluster Configuration Database API, error %s",
136 cs_strerror(rc));
137 local_handle = 0;
138 }
139 }
140
141 if (cmap_handle == 0) {
142 cmap_handle = local_handle;
143
144 rc = cmap_fd_get(cmap_handle, &fd);
145 if (rc != CS_OK) {
146 crm_err("Could not obtain the CMAP API connection: %s (%d)",
147 cs_strerror(rc), rc);
148 goto bail;
149 }
150
151
152 if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
153 &found_uid, &found_gid))) {
154 crm_err("CMAP provider is not authentic:"
155 " process %lld (uid: %lld, gid: %lld)",
156 (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
157 (long long) found_uid, (long long) found_gid);
158 goto bail;
159 } else if (rv < 0) {
160 crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
161 strerror(-rv), -rv);
162 goto bail;
163 }
164 }
165
166 while (name == NULL && cmap_handle != 0) {
167 uint32_t id = 0;
168 char *key = NULL;
169
170 key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
171 rc = cmap_get_uint32(cmap_handle, key, &id);
172 crm_trace("Checking %u vs %u from %s", nodeid, id, key);
173 free(key);
174
175 if (rc != CS_OK) {
176 break;
177 }
178
179 if (nodeid == id) {
180 crm_trace("Searching for node name for %u in nodelist.node.%d %s",
181 nodeid, lpc, pcmk__s(name, "<null>"));
182 if (name == NULL) {
183 key = crm_strdup_printf("nodelist.node.%d.name", lpc);
184 cmap_get_string(cmap_handle, key, &name);
185 crm_trace("%s = %s", key, pcmk__s(name, "<null>"));
186 free(key);
187 }
188 if (name == NULL) {
189 key = crm_strdup_printf("nodelist.node.%d.ring0_addr", lpc);
190 cmap_get_string(cmap_handle, key, &name);
191 crm_trace("%s = %s", key, pcmk__s(name, "<null>"));
192
193 if (!node_name_is_valid(key, name)) {
194 free(name);
195 name = NULL;
196 }
197 free(key);
198 }
199 break;
200 }
201
202 lpc++;
203 }
204
205 bail:
206 if(local_handle) {
207 cmap_finalize(local_handle);
208 }
209
210 if (name == NULL) {
211 crm_info("Unable to get node name for nodeid %u", nodeid);
212 }
213 return name;
214 }
215
216
217
218
219
220
221
222 void
223 pcmk__corosync_disconnect(crm_cluster_t *cluster)
224 {
225 cluster_disconnect_cpg(cluster);
226 if (pcmk_quorum_handle) {
227 quorum_finalize(pcmk_quorum_handle);
228 pcmk_quorum_handle = 0;
229 }
230 crm_notice("Disconnected from Corosync");
231 }
232
233
234
235
236
237
238
239
240
241 static int
242 quorum_dispatch_cb(gpointer user_data)
243 {
244 int rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL);
245
246 if (rc < 0) {
247 crm_err("Connection to the Quorum API failed: %d", rc);
248 quorum_finalize(pcmk_quorum_handle);
249 pcmk_quorum_handle = 0;
250 return -1;
251 }
252 return 0;
253 }
254
255
256
257
258
259
260
261
262
263
264
265 static void
266 quorum_notification_cb(quorum_handle_t handle, uint32_t quorate,
267 uint64_t ring_id, uint32_t view_list_entries,
268 uint32_t *view_list)
269 {
270 int i;
271 GHashTableIter iter;
272 crm_node_t *node = NULL;
273 static gboolean init_phase = TRUE;
274
275 if (quorate != crm_have_quorum) {
276 if (quorate) {
277 crm_notice("Quorum acquired " CRM_XS " membership=%" PRIu64 " members=%lu",
278 ring_id, (long unsigned int)view_list_entries);
279 } else {
280 crm_warn("Quorum lost " CRM_XS " membership=%" PRIu64 " members=%lu",
281 ring_id, (long unsigned int)view_list_entries);
282 }
283 crm_have_quorum = quorate;
284
285 } else {
286 crm_info("Quorum %s " CRM_XS " membership=%" PRIu64 " members=%lu",
287 (quorate? "retained" : "still lost"), ring_id,
288 (long unsigned int)view_list_entries);
289 }
290
291 if (view_list_entries == 0 && init_phase) {
292 crm_info("Corosync membership is still forming, ignoring");
293 return;
294 }
295
296 init_phase = FALSE;
297
298
299
300 g_hash_table_iter_init(&iter, crm_peer_cache);
301 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
302 node->last_seen = 0;
303 }
304
305
306 for (i = 0; i < view_list_entries; i++) {
307 uint32_t id = view_list[i];
308
309 crm_debug("Member[%d] %u ", i, id);
310
311
312 node = crm_get_peer(id, NULL);
313 if (node->uname == NULL) {
314 char *name = pcmk__corosync_name(0, id);
315
316 crm_info("Obtaining name for new node %u", id);
317 node = crm_get_peer(id, name);
318 free(name);
319 }
320
321
322 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, ring_id);
323 }
324
325
326 pcmk__reap_unseen_nodes(ring_id);
327
328 if (quorum_app_callback) {
329 quorum_app_callback(ring_id, quorate);
330 }
331 }
332
333
334
335
336
337
338
339
340 void
341 pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long,
342 gboolean),
343 void (*destroy)(gpointer))
344 {
345 cs_error_t rc;
346 int fd = 0;
347 int quorate = 0;
348 uint32_t quorum_type = 0;
349 struct mainloop_fd_callbacks quorum_fd_callbacks;
350 uid_t found_uid = 0;
351 gid_t found_gid = 0;
352 pid_t found_pid = 0;
353 int rv;
354
355 quorum_fd_callbacks.dispatch = quorum_dispatch_cb;
356 quorum_fd_callbacks.destroy = destroy;
357
358 crm_debug("Configuring Pacemaker to obtain quorum from Corosync");
359
360 {
361 #if 0
362
363 quorum_model_v0_data_t quorum_model_data = {
364 .model = QUORUM_MODEL_V0,
365 .quorum_notify_fn = quorum_notification_cb,
366 };
367
368 rc = quorum_model_initialize(&pcmk_quorum_handle, QUORUM_MODEL_V0,
369 (quorum_model_data_t *) &quorum_model_data,
370 &quorum_type, NULL);
371 #else
372 quorum_callbacks_t quorum_callbacks = {
373 .quorum_notify_fn = quorum_notification_cb,
374 };
375
376 rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks,
377 &quorum_type);
378 #endif
379 }
380
381 if (rc != CS_OK) {
382 crm_err("Could not connect to the Quorum API: %s (%d)",
383 cs_strerror(rc), rc);
384 goto bail;
385
386 } else if (quorum_type != QUORUM_SET) {
387 crm_err("Corosync quorum is not configured");
388 goto bail;
389 }
390
391 rc = quorum_fd_get(pcmk_quorum_handle, &fd);
392 if (rc != CS_OK) {
393 crm_err("Could not obtain the Quorum API connection: %s (%d)",
394 strerror(rc), rc);
395 goto bail;
396 }
397
398
399 if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
400 &found_uid, &found_gid))) {
401 crm_err("Quorum provider is not authentic:"
402 " process %lld (uid: %lld, gid: %lld)",
403 (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
404 (long long) found_uid, (long long) found_gid);
405 rc = CS_ERR_ACCESS;
406 goto bail;
407 } else if (rv < 0) {
408 crm_err("Could not verify authenticity of Quorum provider: %s (%d)",
409 strerror(-rv), -rv);
410 rc = CS_ERR_ACCESS;
411 goto bail;
412 }
413
414 rc = quorum_getquorate(pcmk_quorum_handle, &quorate);
415 if (rc != CS_OK) {
416 crm_err("Could not obtain the current Quorum API state: %d", rc);
417 goto bail;
418 }
419
420 if (quorate) {
421 crm_notice("Quorum acquired");
422 } else {
423 crm_warn("No quorum");
424 }
425 quorum_app_callback = dispatch;
426 crm_have_quorum = quorate;
427
428 rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT);
429 if (rc != CS_OK) {
430 crm_err("Could not setup Quorum API notifications: %d", rc);
431 goto bail;
432 }
433
434 mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks);
435
436 pcmk__corosync_add_nodes(NULL);
437
438 bail:
439 if (rc != CS_OK) {
440 quorum_finalize(pcmk_quorum_handle);
441 }
442 }
443
444
445
446
447
448
449
450 gboolean
451 pcmk__corosync_connect(crm_cluster_t *cluster)
452 {
453 crm_node_t *peer = NULL;
454 enum cluster_type_e stack = get_cluster_type();
455
456 crm_peer_init();
457
458 if (stack != pcmk_cluster_corosync) {
459 crm_err("Invalid cluster type: %s " CRM_XS " stack=%d",
460 name_for_cluster_type(stack), stack);
461 return FALSE;
462 }
463
464 if (!cluster_connect_cpg(cluster)) {
465
466 return FALSE;
467 }
468 crm_info("Connection to %s established", name_for_cluster_type(stack));
469
470 cluster->nodeid = get_local_nodeid(0);
471 if (cluster->nodeid == 0) {
472 crm_err("Could not determine local node ID");
473 return FALSE;
474 }
475
476 cluster->uname = get_node_name(0);
477 if (cluster->uname == NULL) {
478 crm_err("Could not determine local node name");
479 return FALSE;
480 }
481
482
483 peer = crm_get_peer(cluster->nodeid, cluster->uname);
484 cluster->uuid = pcmk__corosync_uuid(peer);
485
486 return TRUE;
487 }
488
489
490
491
492
493
494
495 enum cluster_type_e
496 pcmk__corosync_detect(void)
497 {
498 int rc = CS_OK;
499 cmap_handle_t handle;
500
501 rc = pcmk__init_cmap(&handle);
502
503 switch(rc) {
504 case CS_OK:
505 break;
506 case CS_ERR_SECURITY:
507 crm_debug("Failed to initialize the cmap API: Permission denied (%d)", rc);
508
509
510
511 return pcmk_cluster_corosync;
512
513 default:
514 crm_info("Failed to initialize the cmap API: %s (%d)",
515 pcmk__cs_err_str(rc), rc);
516 return pcmk_cluster_unknown;
517 }
518
519 cmap_finalize(handle);
520 return pcmk_cluster_corosync;
521 }
522
523
524
525
526
527
528
529
530 gboolean
531 crm_is_corosync_peer_active(const crm_node_t *node)
532 {
533 if (node == NULL) {
534 crm_trace("Corosync peer inactive: NULL");
535 return FALSE;
536
537 } else if (!pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
538 crm_trace("Corosync peer %s inactive: state=%s",
539 node->uname, node->state);
540 return FALSE;
541
542 } else if (!pcmk_is_set(node->processes, crm_proc_cpg)) {
543 crm_trace("Corosync peer %s inactive: processes=%.16x",
544 node->uname, node->processes);
545 return FALSE;
546 }
547 return TRUE;
548 }
549
550
551
552
553
554
555
556
557
558 bool
559 pcmk__corosync_add_nodes(xmlNode *xml_parent)
560 {
561 int lpc = 0;
562 cs_error_t rc = CS_OK;
563 int retries = 0;
564 bool any = false;
565 cmap_handle_t cmap_handle;
566 int fd = -1;
567 uid_t found_uid = 0;
568 gid_t found_gid = 0;
569 pid_t found_pid = 0;
570 int rv;
571
572 do {
573 rc = pcmk__init_cmap(&cmap_handle);
574 if (rc != CS_OK) {
575 retries++;
576 crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
577 retries);
578 sleep(retries);
579 }
580
581 } while (retries < 5 && rc != CS_OK);
582
583 if (rc != CS_OK) {
584 crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
585 return false;
586 }
587
588 rc = cmap_fd_get(cmap_handle, &fd);
589 if (rc != CS_OK) {
590 crm_err("Could not obtain the CMAP API connection: %s (%d)",
591 cs_strerror(rc), rc);
592 goto bail;
593 }
594
595
596 if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
597 &found_uid, &found_gid))) {
598 crm_err("CMAP provider is not authentic:"
599 " process %lld (uid: %lld, gid: %lld)",
600 (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
601 (long long) found_uid, (long long) found_gid);
602 goto bail;
603 } else if (rv < 0) {
604 crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
605 strerror(-rv), -rv);
606 goto bail;
607 }
608
609 crm_peer_init();
610 crm_trace("Initializing Corosync node list");
611 for (lpc = 0; TRUE; lpc++) {
612 uint32_t nodeid = 0;
613 char *name = NULL;
614 char *key = NULL;
615
616 key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
617 rc = cmap_get_uint32(cmap_handle, key, &nodeid);
618 free(key);
619
620 if (rc != CS_OK) {
621 break;
622 }
623
624 name = pcmk__corosync_name(cmap_handle, nodeid);
625 if (name != NULL) {
626 GHashTableIter iter;
627 crm_node_t *node = NULL;
628
629 g_hash_table_iter_init(&iter, crm_peer_cache);
630 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
631 if(node && node->uname && strcasecmp(node->uname, name) == 0) {
632 if (node->id && node->id != nodeid) {
633 crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id,
634 nodeid, name);
635 crm_exit(CRM_EX_FATAL);
636 }
637 }
638 }
639 }
640
641 if (nodeid > 0 || name != NULL) {
642 crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name);
643 crm_get_peer(nodeid, name);
644 }
645
646 if (nodeid > 0 && name != NULL) {
647 any = true;
648
649 if (xml_parent) {
650 xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE);
651
652 crm_xml_set_id(node, "%u", nodeid);
653 crm_xml_add(node, XML_ATTR_UNAME, name);
654 }
655 }
656
657 free(name);
658 }
659 bail:
660 cmap_finalize(cmap_handle);
661 return any;
662 }
663
664
665
666
667
668
669
670 char *
671 pcmk__corosync_cluster_name(void)
672 {
673 cmap_handle_t handle;
674 char *cluster_name = NULL;
675 cs_error_t rc = CS_OK;
676 int fd = -1;
677 uid_t found_uid = 0;
678 gid_t found_gid = 0;
679 pid_t found_pid = 0;
680 int rv;
681
682 rc = pcmk__init_cmap(&handle);
683 if (rc != CS_OK) {
684 crm_info("Failed to initialize the cmap API: %s (%d)",
685 cs_strerror(rc), rc);
686 return NULL;
687 }
688
689 rc = cmap_fd_get(handle, &fd);
690 if (rc != CS_OK) {
691 crm_err("Could not obtain the CMAP API connection: %s (%d)",
692 cs_strerror(rc), rc);
693 goto bail;
694 }
695
696
697 if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
698 &found_uid, &found_gid))) {
699 crm_err("CMAP provider is not authentic:"
700 " process %lld (uid: %lld, gid: %lld)",
701 (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
702 (long long) found_uid, (long long) found_gid);
703 goto bail;
704 } else if (rv < 0) {
705 crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
706 strerror(-rv), -rv);
707 goto bail;
708 }
709
710 rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name);
711 if (rc != CS_OK) {
712 crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc);
713
714 } else {
715 crm_debug("cmap totem.cluster_name = '%s'", cluster_name);
716 }
717
718 bail:
719 cmap_finalize(handle);
720 return cluster_name;
721 }
722
723
724
725
726
727
728
729 bool
730 pcmk__corosync_has_nodelist(void)
731 {
732 cs_error_t cs_rc = CS_OK;
733 int retries = 0;
734 cmap_handle_t cmap_handle;
735 cmap_iter_handle_t iter_handle;
736 char key_name[CMAP_KEYNAME_MAXLEN + 1];
737 int fd = -1;
738 uid_t found_uid = 0;
739 gid_t found_gid = 0;
740 pid_t found_pid = 0;
741 int rc = pcmk_ok;
742
743 static bool got_result = false;
744 static bool result = false;
745
746 if (got_result) {
747 return result;
748 }
749
750
751 do {
752 cs_rc = pcmk__init_cmap(&cmap_handle);
753 if (cs_rc != CS_OK) {
754 retries++;
755 crm_debug("CMAP connection failed: %s (rc=%d, retrying in %ds)",
756 cs_strerror(cs_rc), cs_rc, retries);
757 sleep(retries);
758 }
759 } while ((retries < 5) && (cs_rc != CS_OK));
760 if (cs_rc != CS_OK) {
761 crm_warn("Assuming Corosync does not have node list: "
762 "CMAP connection failed (%s) " CRM_XS " rc=%d",
763 cs_strerror(cs_rc), cs_rc);
764 return false;
765 }
766
767
768 cs_rc = cmap_fd_get(cmap_handle, &fd);
769 if (cs_rc != CS_OK) {
770 crm_warn("Assuming Corosync does not have node list: "
771 "CMAP unusable (%s) " CRM_XS " rc=%d",
772 cs_strerror(cs_rc), cs_rc);
773 goto bail;
774 }
775
776
777 rc = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0,
778 &found_pid, &found_uid, &found_gid);
779 if (rc == 0) {
780 crm_warn("Assuming Corosync does not have node list: "
781 "CMAP provider is inauthentic "
782 CRM_XS " pid=%lld uid=%lld gid=%lld",
783 (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
784 (long long) found_uid, (long long) found_gid);
785 goto bail;
786 } else if (rc < 0) {
787 crm_warn("Assuming Corosync does not have node list: "
788 "Could not verify CMAP authenticity (%s) " CRM_XS " rc=%d",
789 pcmk_strerror(rc), rc);
790 goto bail;
791 }
792
793
794 cs_rc = cmap_iter_init(cmap_handle, "nodelist", &iter_handle);
795 if (cs_rc != CS_OK) {
796 crm_warn("Assuming Corosync does not have node list: "
797 "CMAP not readable (%s) " CRM_XS " rc=%d",
798 cs_strerror(cs_rc), cs_rc);
799 goto bail;
800 }
801
802 cs_rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL);
803 if (cs_rc == CS_OK) {
804 result = true;
805 }
806
807 cmap_iter_finalize(cmap_handle, iter_handle);
808 got_result = true;
809 crm_debug("Corosync %s node list", (result? "has" : "does not have"));
810
811 bail:
812 cmap_finalize(cmap_handle);
813 return result;
814 }