This source file includes following definitions.
- enable_crmd_as_root
- enable_mgmtd
- get_process_list
- pcmk_process_exit
- pcmk_child_exit
- stop_child
- start_child
- escalate_shutdown
- pcmk_shutdown_worker
- pcmk_ignore
- pcmk_sigquit
- pcmk_shutdown
- pcmk_ipc_accept
- pcmk_ipc_created
- pcmk_ipc_dispatch
- pcmk_ipc_closed
- pcmk_ipc_destroy
- update_process_clients
- update_process_peers
- update_node_processes
- mcp_chown
- check_active_before_startup_processes
- find_and_track_existing_processes
- init_children_processes
- mcp_cpg_destroy
- mcp_cpg_deliver
- mcp_cpg_membership
- mcp_quorum_callback
- mcp_quorum_destroy
- mcp_cman_dispatch
- mcp_cman_destroy
- main
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 #include <crm_internal.h>
20 #include <pacemaker.h>
21
22 #include <pwd.h>
23 #include <grp.h>
24 #include <sys/stat.h>
25 #include <sys/types.h>
26 #include <sys/time.h>
27 #include <sys/resource.h>
28 #include <sys/reboot.h>
29
30 #include <crm/msg_xml.h>
31 #include <crm/common/ipcs.h>
32 #include <crm/common/mainloop.h>
33 #include <crm/cluster/internal.h>
34 #include <crm/cluster.h>
35
36 #include <dirent.h>
37 #include <ctype.h>
38
39 gboolean pcmk_quorate = FALSE;
40 gboolean fatal_error = FALSE;
41 GMainLoop *mainloop = NULL;
42
43 #define PCMK_PROCESS_CHECK_INTERVAL 5
44
45 const char *local_name = NULL;
46 uint32_t local_nodeid = 0;
47 crm_trigger_t *shutdown_trigger = NULL;
48 const char *pid_file = "/var/run/pacemaker.pid";
49
50 typedef struct pcmk_child_s {
51 int pid;
52 long flag;
53 int start_seq;
54 int respawn_count;
55 gboolean respawn;
56 const char *name;
57 const char *uid;
58 const char *command;
59
60 gboolean active_before_startup;
61 } pcmk_child_t;
62
63
64 #define pcmk_child_crmd 4
65 #define pcmk_child_mgmtd 8
66
67 static pcmk_child_t pcmk_children[] = {
68 { 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL },
69 { 0, crm_proc_plugin, 0, 0, FALSE, "ais", NULL, NULL },
70 { 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd" },
71 { 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" },
72 { 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" },
73 { 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" },
74 { 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL },
75 { 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" },
76 { 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd" },
77 { 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" },
78 };
79
80
81 static gboolean start_child(pcmk_child_t * child);
82 static gboolean check_active_before_startup_processes(gpointer user_data);
83 void update_process_clients(crm_client_t *client);
84 void update_process_peers(void);
85
86 void
87 enable_crmd_as_root(gboolean enable)
88 {
89 if (enable) {
90 pcmk_children[pcmk_child_crmd].uid = NULL;
91 } else {
92 pcmk_children[pcmk_child_crmd].uid = CRM_DAEMON_USER;
93 }
94 }
95
96 void
97 enable_mgmtd(gboolean enable)
98 {
99 if (enable) {
100 pcmk_children[pcmk_child_mgmtd].start_seq = 7;
101 } else {
102 pcmk_children[pcmk_child_mgmtd].start_seq = 0;
103 }
104 }
105
106 static uint32_t
107 get_process_list(void)
108 {
109 int lpc = 0;
110 uint32_t procs = crm_get_cluster_proc();
111
112 for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) {
113 if (pcmk_children[lpc].pid != 0) {
114 procs |= pcmk_children[lpc].flag;
115 }
116 }
117 return procs;
118 }
119
120 static void
121 pcmk_process_exit(pcmk_child_t * child)
122 {
123 child->pid = 0;
124 child->active_before_startup = FALSE;
125
126
127
128
129
130
131
132
133
134 update_node_processes(local_nodeid, NULL, get_process_list());
135
136 child->respawn_count += 1;
137 if (child->respawn_count > MAX_RESPAWN) {
138 crm_err("Child respawn count exceeded by %s", child->name);
139 child->respawn = FALSE;
140 }
141
142 if (shutdown_trigger) {
143 mainloop_set_trigger(shutdown_trigger);
144 update_node_processes(local_nodeid, NULL, get_process_list());
145
146 } else if (child->respawn && crm_is_true(getenv("PCMK_fail_fast"))) {
147 crm_err("Rebooting system because of %s", child->name);
148 pcmk_panic(__FUNCTION__);
149
150 } else if (child->respawn) {
151 crm_notice("Respawning failed child process: %s", child->name);
152 start_child(child);
153 }
154 }
155
156 static void
157 pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
158 {
159 pcmk_child_t *child = mainloop_child_userdata(p);
160 const char *name = mainloop_child_name(p);
161
162 if (signo && signo == SIGKILL) {
163 crm_warn("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core);
164
165 } else if (signo) {
166 crm_err("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core);
167
168 } else {
169 switch(exitcode) {
170 case pcmk_ok:
171 crm_info("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode);
172 break;
173
174 case DAEMON_RESPAWN_STOP:
175 crm_warn("The %s process (%d) can no longer be respawned, shutting the cluster down.", name, pid);
176 child->respawn = FALSE;
177 fatal_error = TRUE;
178 pcmk_shutdown(SIGTERM);
179 break;
180
181 case pcmk_err_panic:
182 do_crm_log_always(LOG_EMERG, "The %s process (%d) instructed the machine to reset", name, pid);
183 child->respawn = FALSE;
184 fatal_error = TRUE;
185 pcmk_panic(__FUNCTION__);
186 pcmk_shutdown(SIGTERM);
187 break;
188
189 default:
190 crm_err("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode);
191 break;
192 }
193 }
194
195 pcmk_process_exit(child);
196 }
197
198 static gboolean
199 stop_child(pcmk_child_t * child, int signal)
200 {
201 if (signal == 0) {
202 signal = SIGTERM;
203 }
204
205 if (child->command == NULL) {
206 crm_debug("Nothing to do for child \"%s\"", child->name);
207 return TRUE;
208 }
209
210 if (child->pid <= 0) {
211 crm_trace("Client %s not running", child->name);
212 return TRUE;
213 }
214
215 errno = 0;
216 if (kill(child->pid, signal) == 0) {
217 crm_notice("Stopping %s "CRM_XS" sent signal %d to process %d",
218 child->name, signal, child->pid);
219
220 } else {
221 crm_perror(LOG_ERR, "Could not stop %s (process %d) with signal %d",
222 child->name, child->pid, signal);
223 }
224
225 return TRUE;
226 }
227
228 static char *opts_default[] = { NULL, NULL };
229 static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
230
231 static gboolean
232 start_child(pcmk_child_t * child)
233 {
234 int lpc = 0;
235 uid_t uid = 0;
236 gid_t gid = 0;
237 struct rlimit oflimits;
238 gboolean use_valgrind = FALSE;
239 gboolean use_callgrind = FALSE;
240 const char *devnull = "/dev/null";
241 const char *env_valgrind = getenv("PCMK_valgrind_enabled");
242 const char *env_callgrind = getenv("PCMK_callgrind_enabled");
243 enum cluster_type_e stack = get_cluster_type();
244
245 child->active_before_startup = FALSE;
246
247 if (child->command == NULL) {
248 crm_info("Nothing to do for child \"%s\"", child->name);
249 return TRUE;
250 }
251
252 if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
253 use_callgrind = TRUE;
254 use_valgrind = TRUE;
255
256 } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
257 use_callgrind = TRUE;
258 use_valgrind = TRUE;
259
260 } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
261 use_valgrind = TRUE;
262
263 } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
264 use_valgrind = TRUE;
265 }
266
267 if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
268 crm_warn("Cannot enable valgrind for %s:"
269 " The location of the valgrind binary is unknown", child->name);
270 use_valgrind = FALSE;
271 }
272
273 if (child->uid) {
274 if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
275 crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
276 return FALSE;
277 }
278 crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
279 }
280
281 child->pid = fork();
282 CRM_ASSERT(child->pid != -1);
283
284 if (child->pid > 0) {
285
286 mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
287
288 crm_info("Forked child %d for process %s%s", child->pid, child->name,
289 use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
290 update_node_processes(local_nodeid, NULL, get_process_list());
291 return TRUE;
292
293 } else {
294
295 (void)setsid();
296
297
298 opts_vgrind[0] = strdup(VALGRIND_BIN);
299 if (use_callgrind) {
300 opts_vgrind[1] = strdup("--tool=callgrind");
301 opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
302 opts_vgrind[3] = strdup(child->command);
303 opts_vgrind[4] = NULL;
304 } else {
305 opts_vgrind[1] = strdup(child->command);
306 opts_vgrind[2] = NULL;
307 opts_vgrind[3] = NULL;
308 opts_vgrind[4] = NULL;
309 }
310 opts_default[0] = strdup(child->command);
311
312 if(gid) {
313 if(stack == pcmk_cluster_corosync) {
314
315
316
317
318
319
320 if (setgid(gid) < 0) {
321 crm_perror(LOG_ERR, "Could not set group to %d", gid);
322 }
323
324
325 } else if (initgroups(child->uid, gid) < 0) {
326 crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno);
327 }
328 }
329
330 if (uid && setuid(uid) < 0) {
331 crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid);
332 }
333
334
335 getrlimit(RLIMIT_NOFILE, &oflimits);
336 for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) {
337 close(lpc);
338 }
339
340 (void)open(devnull, O_RDONLY);
341 (void)open(devnull, O_WRONLY);
342 (void)open(devnull, O_WRONLY);
343
344 if (use_valgrind) {
345 (void)execvp(VALGRIND_BIN, opts_vgrind);
346 } else {
347 (void)execvp(child->command, opts_default);
348 }
349 crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command);
350 crm_exit(DAEMON_RESPAWN_STOP);
351 }
352 return TRUE;
353 }
354
355 static gboolean
356 escalate_shutdown(gpointer data)
357 {
358
359 pcmk_child_t *child = data;
360
361 if (child->pid) {
362
363 crm_err("Child %s not terminating in a timely manner, forcing", child->name);
364 stop_child(child, SIGSEGV);
365 }
366 return FALSE;
367 }
368
369 static gboolean
370 pcmk_shutdown_worker(gpointer user_data)
371 {
372 static int phase = 0;
373 static time_t next_log = 0;
374 static int max = SIZEOF(pcmk_children);
375
376 int lpc = 0;
377
378 if (phase == 0) {
379 crm_notice("Shutting down Pacemaker");
380 phase = max;
381
382
383 g_timeout_add_seconds(5, check_active_before_startup_processes, NULL);
384 }
385
386 for (; phase > 0; phase--) {
387
388
389 for (lpc = max - 1; lpc >= 0; lpc--) {
390 pcmk_child_t *child = &(pcmk_children[lpc]);
391
392 if (phase != child->start_seq) {
393 continue;
394 }
395
396 if (child->pid) {
397 time_t now = time(NULL);
398
399 if (child->respawn) {
400 next_log = now + 30;
401 child->respawn = FALSE;
402 stop_child(child, SIGTERM);
403 if (phase < pcmk_children[pcmk_child_crmd].start_seq) {
404 g_timeout_add(180000 , escalate_shutdown, child);
405 }
406
407 } else if (now >= next_log) {
408 next_log = now + 30;
409 crm_notice("Still waiting for %s to terminate "
410 CRM_XS " pid=%d seq=%d",
411 child->name, child->pid, child->start_seq);
412 }
413 return TRUE;
414 }
415
416
417 crm_debug("%s confirmed stopped", child->name);
418 child->pid = 0;
419 }
420 }
421
422
423 crm_notice("Shutdown complete");
424
425 {
426 const char *delay = daemon_option("shutdown_delay");
427 if(delay) {
428 sync();
429 sleep(crm_get_msec(delay) / 1000);
430 }
431 }
432
433 g_main_loop_quit(mainloop);
434
435 if (fatal_error) {
436 crm_notice("Attempting to inhibit respawning after fatal error");
437 crm_exit(DAEMON_RESPAWN_STOP);
438 }
439
440 return TRUE;
441 }
442
443 static void
444 pcmk_ignore(int nsig)
445 {
446 crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig);
447 }
448
449 static void
450 pcmk_sigquit(int nsig)
451 {
452 pcmk_panic(__FUNCTION__);
453 }
454
455 void
456 pcmk_shutdown(int nsig)
457 {
458 if (shutdown_trigger == NULL) {
459 shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
460 }
461 mainloop_set_trigger(shutdown_trigger);
462 }
463
464 static int32_t
465 pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
466 {
467 crm_trace("Connection %p", c);
468 if (crm_client_new(c, uid, gid) == NULL) {
469 return -EIO;
470 }
471 return 0;
472 }
473
474 static void
475 pcmk_ipc_created(qb_ipcs_connection_t * c)
476 {
477 crm_trace("Connection %p", c);
478 }
479
480
481 static int32_t
482 pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
483 {
484 uint32_t id = 0;
485 uint32_t flags = 0;
486 const char *task = NULL;
487 crm_client_t *c = crm_client_get(qbc);
488 xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags);
489
490 crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__);
491 if (msg == NULL) {
492 return 0;
493 }
494
495 task = crm_element_value(msg, F_CRM_TASK);
496 if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
497
498 crm_notice("Shutting down in response to ticket %s (%s)",
499 crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN));
500 pcmk_shutdown(15);
501
502 } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
503
504 struct iovec *iov;
505 int id = 0;
506 const char *name = NULL;
507
508 crm_element_value_int(msg, XML_ATTR_ID, &id);
509 name = crm_element_value(msg, XML_ATTR_UNAME);
510 crm_notice("Instructing peers to remove references to node %s/%u", name, id);
511
512 iov = calloc(1, sizeof(struct iovec));
513 iov->iov_base = dump_xml_unformatted(msg);
514 iov->iov_len = 1 + strlen(iov->iov_base);
515 send_cpg_iov(iov);
516
517 } else {
518 update_process_clients(c);
519 }
520
521 free_xml(msg);
522 return 0;
523 }
524
525
526 static int32_t
527 pcmk_ipc_closed(qb_ipcs_connection_t * c)
528 {
529 crm_client_t *client = crm_client_get(c);
530
531 if (client == NULL) {
532 return 0;
533 }
534 crm_trace("Connection %p", c);
535 crm_client_destroy(client);
536 return 0;
537 }
538
539 static void
540 pcmk_ipc_destroy(qb_ipcs_connection_t * c)
541 {
542 crm_trace("Connection %p", c);
543 pcmk_ipc_closed(c);
544 }
545
546 struct qb_ipcs_service_handlers mcp_ipc_callbacks = {
547 .connection_accept = pcmk_ipc_accept,
548 .connection_created = pcmk_ipc_created,
549 .msg_process = pcmk_ipc_dispatch,
550 .connection_closed = pcmk_ipc_closed,
551 .connection_destroyed = pcmk_ipc_destroy
552 };
553
554
555
556
557
558
559
560 void
561 update_process_clients(crm_client_t *client)
562 {
563 GHashTableIter iter;
564 crm_node_t *node = NULL;
565 xmlNode *update = create_xml_node(NULL, "nodes");
566
567 if (is_corosync_cluster()) {
568 crm_xml_add_int(update, "quorate", pcmk_quorate);
569 }
570
571 g_hash_table_iter_init(&iter, crm_peer_cache);
572 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
573 xmlNode *xml = create_xml_node(update, "node");
574
575 crm_xml_add_int(xml, "id", node->id);
576 crm_xml_add(xml, "uname", node->uname);
577 crm_xml_add(xml, "state", node->state);
578 crm_xml_add_int(xml, "processes", node->processes);
579 }
580
581 if(client) {
582 crm_trace("Sending process list to client %s", client->id);
583 crm_ipcs_send(client, 0, update, crm_ipc_server_event);
584
585 } else {
586 crm_trace("Sending process list to %d clients", crm_hash_table_size(client_connections));
587 g_hash_table_iter_init(&iter, client_connections);
588 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & client)) {
589 crm_ipcs_send(client, 0, update, crm_ipc_server_event);
590 }
591 }
592
593 free_xml(update);
594 }
595
596
597
598
599
600 void
601 update_process_peers(void)
602 {
603
604
605 char buffer[1024];
606 struct iovec *iov;
607 int rc = 0;
608
609 if (local_name) {
610 rc = snprintf(buffer, SIZEOF(buffer), "<node uname=\"%s\" proclist=\"%u\"/>",
611 local_name, get_process_list());
612 } else {
613 rc = snprintf(buffer, SIZEOF(buffer), "<node proclist=\"%u\"/>", get_process_list());
614 }
615
616 crm_trace("Sending %s", buffer);
617 iov = calloc(1, sizeof(struct iovec));
618 iov->iov_base = strdup(buffer);
619 iov->iov_len = rc + 1;
620 send_cpg_iov(iov);
621 }
622
623
624
625
626
627
628
629
630
631
632
633 gboolean
634 update_node_processes(uint32_t id, const char *uname, uint32_t procs)
635 {
636 gboolean changed = FALSE;
637 crm_node_t *node = crm_get_peer(id, uname);
638
639 if (procs != 0) {
640 if (procs != node->processes) {
641 crm_debug("Node %s now has process list: %.32x (was %.32x)",
642 node->uname, procs, node->processes);
643 node->processes = procs;
644 changed = TRUE;
645
646
647 if (id == local_nodeid) {
648 update_process_clients(NULL);
649 update_process_peers();
650 }
651
652 } else {
653 crm_trace("Node %s still has process list: %.32x", node->uname, procs);
654 }
655 }
656 return changed;
657 }
658
659
660
661 static struct crm_option long_options[] = {
662
663 {"help", 0, 0, '?', "\tThis text"},
664 {"version", 0, 0, '$', "\tVersion information" },
665 {"verbose", 0, 0, 'V', "\tIncrease debug output"},
666 {"shutdown", 0, 0, 'S', "\tInstruct Pacemaker to shutdown on this machine"},
667 {"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"},
668
669 {"-spacer-", 1, 0, '-', "\nAdditional Options:"},
670 {"foreground", 0, 0, 'f', "\t(Ignored) Pacemaker always runs in the foreground"},
671 {"pid-file", 1, 0, 'p', "\t(Ignored) Daemon pid file location"},
672 {"standby", 0, 0, 's', "\tStart node in standby state"},
673
674 {NULL, 0, 0, 0}
675 };
676
677
678 static void
679 mcp_chown(const char *path, uid_t uid, gid_t gid)
680 {
681 int rc = chown(path, uid, gid);
682
683 if (rc < 0) {
684 crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s",
685 path, CRM_DAEMON_USER, gid, pcmk_strerror(errno));
686 }
687 }
688
689 static gboolean
690 check_active_before_startup_processes(gpointer user_data)
691 {
692 int start_seq = 1, lpc = 0;
693 static int max = SIZEOF(pcmk_children);
694 gboolean keep_tracking = FALSE;
695
696 for (start_seq = 1; start_seq < max; start_seq++) {
697 for (lpc = 0; lpc < max; lpc++) {
698 if (pcmk_children[lpc].active_before_startup == FALSE) {
699
700 continue;
701 } else if (start_seq != pcmk_children[lpc].start_seq) {
702 continue;
703 } else {
704 const char *name = pcmk_children[lpc].name;
705 if (pcmk_children[lpc].flag == crm_proc_stonith_ng) {
706 name = "stonithd";
707 }
708
709 if (crm_pid_active(pcmk_children[lpc].pid, name) != 1) {
710 crm_notice("Process %s terminated (pid=%d)",
711 name, pcmk_children[lpc].pid);
712 pcmk_process_exit(&(pcmk_children[lpc]));
713 continue;
714 }
715 }
716
717
718 keep_tracking = TRUE;
719 }
720 }
721
722 return keep_tracking;
723 }
724
725 static bool
726 find_and_track_existing_processes(void)
727 {
728 DIR *dp;
729 struct dirent *entry;
730 int start_tracker = 0;
731 char entry_name[64];
732
733 dp = opendir("/proc");
734 if (!dp) {
735
736 crm_notice("Can not read /proc directory to track existing components");
737 return FALSE;
738 }
739
740 while ((entry = readdir(dp)) != NULL) {
741 int pid;
742 int max = SIZEOF(pcmk_children);
743 int i;
744
745 if (crm_procfs_process_info(entry, entry_name, &pid) < 0) {
746 continue;
747 }
748 for (i = 0; i < max; i++) {
749 const char *name = pcmk_children[i].name;
750
751 if (pcmk_children[i].start_seq == 0) {
752 continue;
753 }
754 if (pcmk_children[i].flag == crm_proc_stonith_ng) {
755 name = "stonithd";
756 }
757 if (safe_str_eq(entry_name, name) && (crm_pid_active(pid, NULL) == 1)) {
758 crm_notice("Tracking existing %s process (pid=%d)", name, pid);
759 pcmk_children[i].pid = pid;
760 pcmk_children[i].active_before_startup = TRUE;
761 start_tracker = 1;
762 break;
763 }
764 }
765 }
766
767 if (start_tracker) {
768 g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes,
769 NULL);
770 }
771 closedir(dp);
772
773 return start_tracker;
774 }
775
776 static void
777 init_children_processes(void)
778 {
779 int start_seq = 1, lpc = 0;
780 static int max = SIZEOF(pcmk_children);
781
782
783 for (start_seq = 1; start_seq < max; start_seq++) {
784
785 for (lpc = 0; lpc < max; lpc++) {
786 if (pcmk_children[lpc].pid) {
787
788 continue;
789 }
790
791 if (start_seq == pcmk_children[lpc].start_seq) {
792 start_child(&(pcmk_children[lpc]));
793 }
794 }
795 }
796
797
798
799
800
801
802 setenv("PCMK_respawned", "true", 1);
803 }
804
805 static void
806 mcp_cpg_destroy(gpointer user_data)
807 {
808 crm_err("Connection destroyed");
809 crm_exit(ENOTCONN);
810 }
811
812
813
814
815
816
817
818
819
820
821
822
823 static void
824 mcp_cpg_deliver(cpg_handle_t handle,
825 const struct cpg_name *groupName,
826 uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
827 {
828 xmlNode *xml = string2xml(msg);
829 const char *task = crm_element_value(xml, F_CRM_TASK);
830
831 crm_trace("Received CPG message (%s): %.200s",
832 (task? task : "process list"), (char*)msg);
833
834 if (task == NULL) {
835 if (nodeid == local_nodeid) {
836 crm_info("Ignoring process list sent by peer for local node");
837 } else {
838 uint32_t procs = 0;
839 const char *uname = crm_element_value(xml, "uname");
840
841 crm_element_value_int(xml, "proclist", (int *)&procs);
842 if (update_node_processes(nodeid, uname, procs)) {
843 update_process_clients(NULL);
844 }
845 }
846
847 } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
848 int id = 0;
849 const char *name = NULL;
850
851 crm_element_value_int(xml, XML_ATTR_ID, &id);
852 name = crm_element_value(xml, XML_ATTR_UNAME);
853 reap_crm_member(id, name);
854 }
855
856 if (xml != NULL) {
857 free_xml(xml);
858 }
859 }
860
861 static void
862 mcp_cpg_membership(cpg_handle_t handle,
863 const struct cpg_name *groupName,
864 const struct cpg_address *member_list, size_t member_list_entries,
865 const struct cpg_address *left_list, size_t left_list_entries,
866 const struct cpg_address *joined_list, size_t joined_list_entries)
867 {
868
869 pcmk_cpg_membership(handle, groupName, member_list, member_list_entries,
870 left_list, left_list_entries,
871 joined_list, joined_list_entries);
872
873
874 update_process_peers();
875 }
876
877 static gboolean
878 mcp_quorum_callback(unsigned long long seq, gboolean quorate)
879 {
880 pcmk_quorate = quorate;
881 return TRUE;
882 }
883
884 static void
885 mcp_quorum_destroy(gpointer user_data)
886 {
887 crm_info("connection lost");
888 }
889
890 #if SUPPORT_CMAN
891 static gboolean
892 mcp_cman_dispatch(unsigned long long seq, gboolean quorate)
893 {
894 pcmk_quorate = quorate;
895 return TRUE;
896 }
897
898 static void
899 mcp_cman_destroy(gpointer user_data)
900 {
901 crm_info("connection closed");
902 }
903 #endif
904
905 int
906 main(int argc, char **argv)
907 {
908 int rc;
909 int flag;
910 int argerr = 0;
911
912 int option_index = 0;
913 gboolean shutdown = FALSE;
914
915 uid_t pcmk_uid = 0;
916 gid_t pcmk_gid = 0;
917 struct rlimit cores;
918 crm_ipc_t *old_instance = NULL;
919 qb_ipcs_service_t *ipcs = NULL;
920 const char *facility = daemon_option("logfacility");
921 static crm_cluster_t cluster;
922
923 crm_log_preinit(NULL, argc, argv);
924 crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n");
925 mainloop_add_signal(SIGHUP, pcmk_ignore);
926 mainloop_add_signal(SIGQUIT, pcmk_sigquit);
927
928 while (1) {
929 flag = crm_get_option(argc, argv, &option_index);
930 if (flag == -1)
931 break;
932
933 switch (flag) {
934 case 'V':
935 crm_bump_log_level(argc, argv);
936 break;
937 case 'f':
938
939 break;
940 case 'p':
941 pid_file = optarg;
942 break;
943 case 's':
944 set_daemon_option("node_start_state", "standby");
945 break;
946 case '$':
947 case '?':
948 crm_help(flag, EX_OK);
949 break;
950 case 'S':
951 shutdown = TRUE;
952 break;
953 case 'F':
954 printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION,
955 CRM_FEATURE_SET, CRM_FEATURES);
956 crm_exit(pcmk_ok);
957 default:
958 printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
959 ++argerr;
960 break;
961 }
962 }
963
964 if (optind < argc) {
965 printf("non-option ARGV-elements: ");
966 while (optind < argc)
967 printf("%s ", argv[optind++]);
968 printf("\n");
969 }
970 if (argerr) {
971 crm_help('?', EX_USAGE);
972 }
973
974
975 setenv("LC_ALL", "C", 1);
976 setenv("HA_LOGD", "no", 1);
977
978 set_daemon_option("mcp", "true");
979 set_daemon_option("use_logd", "off");
980
981 crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
982
983
984 set_daemon_option("logfacility", facility);
985
986 crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP);
987 old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0);
988 crm_ipc_connect(old_instance);
989
990 if (shutdown) {
991 crm_debug("Terminating previous instance");
992 while (crm_ipc_connected(old_instance)) {
993 xmlNode *cmd =
994 create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL);
995
996 crm_debug(".");
997 crm_ipc_send(old_instance, cmd, 0, 0, NULL);
998 free_xml(cmd);
999
1000 sleep(2);
1001 }
1002 crm_ipc_close(old_instance);
1003 crm_ipc_destroy(old_instance);
1004 crm_exit(pcmk_ok);
1005
1006 } else if (crm_ipc_connected(old_instance)) {
1007 crm_ipc_close(old_instance);
1008 crm_ipc_destroy(old_instance);
1009 crm_err("Pacemaker is already active, aborting startup");
1010 crm_exit(DAEMON_RESPAWN_STOP);
1011 }
1012
1013 crm_ipc_close(old_instance);
1014 crm_ipc_destroy(old_instance);
1015
1016 if (mcp_read_config() == FALSE) {
1017 crm_notice("Could not obtain corosync config data, exiting");
1018 crm_exit(ENODATA);
1019 }
1020
1021 crm_notice("Starting Pacemaker %s "CRM_XS" build=%s features:%s",
1022 PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
1023 mainloop = g_main_new(FALSE);
1024 sysrq_init();
1025
1026 rc = getrlimit(RLIMIT_CORE, &cores);
1027 if (rc < 0) {
1028 crm_perror(LOG_ERR, "Cannot determine current maximum core size.");
1029 } else {
1030 if (cores.rlim_max == 0 && geteuid() == 0) {
1031 cores.rlim_max = RLIM_INFINITY;
1032 } else {
1033 crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max);
1034 }
1035 cores.rlim_cur = cores.rlim_max;
1036
1037 rc = setrlimit(RLIMIT_CORE, &cores);
1038 if (rc < 0) {
1039 crm_perror(LOG_ERR,
1040 "Core file generation will remain disabled."
1041 " Core files are an important diagnositic tool,"
1042 " please consider enabling them by default.");
1043 }
1044 #if 0
1045
1046
1047
1048 if (system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) {
1049 crm_perror(LOG_ERR, "Could not enable /proc/sys/kernel/core_uses_pid");
1050 }
1051 #endif
1052 }
1053 rc = pcmk_ok;
1054
1055 if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) {
1056 crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER);
1057 crm_exit(ENOKEY);
1058 }
1059
1060 mkdir(CRM_STATE_DIR, 0750);
1061 mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);
1062
1063
1064 crm_build_path(CRM_PACEMAKER_DIR, 0750);
1065 mcp_chown(CRM_PACEMAKER_DIR, pcmk_uid, pcmk_gid);
1066
1067
1068 crm_build_path(CRM_CORE_DIR, 0750);
1069 mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid);
1070
1071
1072 crm_build_path(CRM_BLACKBOX_DIR, 0750);
1073 mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid);
1074
1075
1076 crm_build_path(PE_STATE_DIR, 0750);
1077 mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid);
1078
1079
1080 crm_build_path(CRM_CONFIG_DIR, 0750);
1081 mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid);
1082
1083
1084
1085 ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &mcp_ipc_callbacks);
1086 if (ipcs == NULL) {
1087 crm_err("Couldn't start IPC server");
1088 crm_exit(EIO);
1089 }
1090
1091
1092 if (cluster_connect_cfg(&local_nodeid) == FALSE) {
1093 crm_err("Couldn't connect to Corosync's CFG service");
1094 crm_exit(ENOPROTOOPT);
1095 }
1096
1097 if(pcmk_locate_sbd() > 0) {
1098 setenv("PCMK_watchdog", "true", 1);
1099 } else {
1100 setenv("PCMK_watchdog", "false", 1);
1101 }
1102
1103 find_and_track_existing_processes();
1104
1105 cluster.destroy = mcp_cpg_destroy;
1106 cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver;
1107 cluster.cpg.cpg_confchg_fn = mcp_cpg_membership;
1108
1109 crm_set_autoreap(FALSE);
1110
1111 if(cluster_connect_cpg(&cluster) == FALSE) {
1112 crm_err("Couldn't connect to Corosync's CPG service");
1113 rc = -ENOPROTOOPT;
1114 }
1115
1116 if (rc == pcmk_ok && is_corosync_cluster()) {
1117
1118 if(cluster_connect_quorum(mcp_quorum_callback, mcp_quorum_destroy) == FALSE) {
1119 rc = -ENOTCONN;
1120 }
1121 }
1122
1123 #if SUPPORT_CMAN
1124 if (rc == pcmk_ok && is_cman_cluster()) {
1125 init_cman_connection(mcp_cman_dispatch, mcp_cman_destroy);
1126 }
1127 #endif
1128
1129 if(rc == pcmk_ok) {
1130 local_name = get_local_node_name();
1131 update_node_processes(local_nodeid, local_name, get_process_list());
1132
1133 mainloop_add_signal(SIGTERM, pcmk_shutdown);
1134 mainloop_add_signal(SIGINT, pcmk_shutdown);
1135
1136 init_children_processes();
1137
1138 crm_info("Starting mainloop");
1139
1140 g_main_run(mainloop);
1141 }
1142
1143 if (ipcs) {
1144 crm_trace("Closing IPC server");
1145 mainloop_del_ipc_server(ipcs);
1146 ipcs = NULL;
1147 }
1148
1149 g_main_destroy(mainloop);
1150
1151 cluster_disconnect_cpg(&cluster);
1152 cluster_disconnect_cfg();
1153
1154 crm_info("Exiting %s", crm_system_name);
1155
1156 return crm_exit(rc);
1157 }