This source file includes following definitions.
- pcmkd_cluster_connected
- check_next_subdaemon
- escalate_shutdown
- pcmk_child_exit
- pcmk_process_exit
- pcmk_shutdown_worker
- start_child
- child_liveness
- find_and_track_existing_processes
- init_children_processes
- pcmk_shutdown
- restart_cluster_subdaemons
- stop_child
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include "pacemakerd.h"
12
13 #if SUPPORT_COROSYNC
14 #include "pcmkd_corosync.h"
15 #endif
16
17 #include <errno.h>
18 #include <grp.h>
19 #include <signal.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sys/types.h>
25 #include <time.h>
26 #include <unistd.h>
27
28 #include <crm/cluster.h>
29 #include <crm/common/xml.h>
30
31 enum child_daemon_flags {
32 child_none = 0,
33 child_respawn = 1 << 0,
34 child_needs_cluster = 1 << 1,
35 child_needs_retry = 1 << 2,
36 child_active_before_startup = 1 << 3,
37 };
38
39 typedef struct pcmk_child_s {
40 pid_t pid;
41 int respawn_count;
42 const char *name;
43 const char *uid;
44 const char *command;
45 const char *endpoint;
46 int check_count;
47 uint32_t flags;
48 } pcmk_child_t;
49
50 #define PCMK_PROCESS_CHECK_INTERVAL 1
51 #define PCMK_PROCESS_CHECK_RETRIES 5
52 #define SHUTDOWN_ESCALATION_PERIOD 180000
53
54
55 #define PCMK_CHILD_CONTROLD 5
56
57 static pcmk_child_t pcmk_children[] = {
58 {
59 0, 0, "pacemaker-based", CRM_DAEMON_USER,
60 CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO,
61 0, child_respawn | child_needs_cluster
62 },
63 {
64 0, 0, "pacemaker-fenced", NULL,
65 CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng",
66 0, child_respawn | child_needs_cluster
67 },
68 {
69 0, 0, "pacemaker-execd", NULL,
70 CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD,
71 0, child_respawn
72 },
73 {
74 0, 0, "pacemaker-attrd", CRM_DAEMON_USER,
75 CRM_DAEMON_DIR "/pacemaker-attrd", PCMK__VALUE_ATTRD,
76 0, child_respawn | child_needs_cluster
77 },
78 {
79 0, 0, "pacemaker-schedulerd", CRM_DAEMON_USER,
80 CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE,
81 0, child_respawn
82 },
83 {
84 0, 0, "pacemaker-controld", CRM_DAEMON_USER,
85 CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD,
86 0, child_respawn | child_needs_cluster
87 },
88 };
89
90 static char *opts_default[] = { NULL, NULL };
91 static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
92
93 crm_trigger_t *shutdown_trigger = NULL;
94 crm_trigger_t *startup_trigger = NULL;
95 time_t subdaemon_check_progress = 0;
96
97
98 static bool need_root_group = true;
99
100
101
102
103
104
105
106
107
108
109
110 unsigned int shutdown_complete_state_reported_to = 0;
111 gboolean shutdown_complete_state_reported_client_closed = FALSE;
112
113
114 const char *pacemakerd_state = PCMK__VALUE_INIT;
115 gboolean running_with_sbd = FALSE;
116
117 GMainLoop *mainloop = NULL;
118
119 static gboolean fatal_error = FALSE;
120
121 static int child_liveness(pcmk_child_t *child);
122 static gboolean escalate_shutdown(gpointer data);
123 static int start_child(pcmk_child_t * child);
124 static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
125 static void pcmk_process_exit(pcmk_child_t * child);
126 static gboolean pcmk_shutdown_worker(gpointer user_data);
127 static gboolean stop_child(pcmk_child_t * child, int signal);
128
129 static bool
130 pcmkd_cluster_connected(void)
131 {
132 #if SUPPORT_COROSYNC
133 return pcmkd_corosync_connected();
134 #else
135 return true;
136 #endif
137 }
138
139 static gboolean
140 check_next_subdaemon(gpointer user_data)
141 {
142 static int next_child = 0;
143
144 pcmk_child_t *child = &(pcmk_children[next_child]);
145 const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid);
146 int rc = child_liveness(child);
147
148 crm_trace("Checked %s[%lld]: %s (%d)",
149 child->name, pid, pcmk_rc_str(rc), rc);
150
151 switch (rc) {
152 case pcmk_rc_ok:
153 child->check_count = 0;
154 subdaemon_check_progress = time(NULL);
155 break;
156
157 case pcmk_rc_ipc_pid_only:
158 if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) {
159 crm_crit("%s[%lld] is unresponsive to IPC after %d attempt%s "
160 "and will now be killed",
161 child->name, pid, child->check_count,
162 pcmk__plural_s(child->check_count));
163 stop_child(child, SIGKILL);
164 if (pcmk_is_set(child->flags, child_respawn)) {
165
166 child->check_count = 0;
167 }
168 } else {
169 crm_notice("%s[%lld] is unresponsive to IPC after %d attempt%s",
170 child->name, pid, child->check_count,
171 pcmk__plural_s(child->check_count));
172 if (pcmk_is_set(child->flags, child_respawn)) {
173
174
175
176
177
178 subdaemon_check_progress = time(NULL);
179 }
180 }
181
182
183
184 break;
185 case pcmk_rc_ipc_unresponsive:
186 if (!pcmk_is_set(child->flags, child_respawn)) {
187
188
189
190
191
192
193 if (child->respawn_count <= MAX_RESPAWN) {
194 subdaemon_check_progress = time(NULL);
195 }
196 }
197 if (!pcmk_is_set(child->flags, child_active_before_startup)) {
198 crm_trace("%s[%lld] terminated (relying on SIGCHLD handler)",
199 child->name, pid);
200 break;
201 }
202 if (pcmk_is_set(child->flags, child_respawn)) {
203 crm_err("%s[%lld] terminated", child->name, pid);
204 } else {
205
206 crm_notice("%s[%lld] terminated", child->name, pid);
207 }
208 pcmk_process_exit(child);
209 break;
210 default:
211 crm_exit(CRM_EX_FATAL);
212 break;
213 }
214
215 if (++next_child >= PCMK__NELEM(pcmk_children)) {
216 next_child = 0;
217 }
218
219 return G_SOURCE_CONTINUE;
220 }
221
222 static gboolean
223 escalate_shutdown(gpointer data)
224 {
225 pcmk_child_t *child = data;
226
227 if (child->pid == PCMK__SPECIAL_PID) {
228 pcmk_process_exit(child);
229
230 } else if (child->pid != 0) {
231
232 crm_err("Child %s not terminating in a timely manner, forcing", child->name);
233 stop_child(child, SIGSEGV);
234 }
235 return FALSE;
236 }
237
238 static void
239 pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
240 {
241 pcmk_child_t *child = mainloop_child_userdata(p);
242 const char *name = mainloop_child_name(p);
243
244 if (signo) {
245 do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
246 "%s[%d] terminated with signal %d (%s)%s",
247 name, pid, signo, strsignal(signo),
248 (core? " and dumped core" : ""));
249
250 } else {
251 switch(exitcode) {
252 case CRM_EX_OK:
253 crm_info("%s[%d] exited with status %d (%s)",
254 name, pid, exitcode, crm_exit_str(exitcode));
255 break;
256
257 case CRM_EX_FATAL:
258 crm_warn("Shutting cluster down because %s[%d] had fatal failure",
259 name, pid);
260 child->flags &= ~child_respawn;
261 fatal_error = TRUE;
262 pcmk_shutdown(SIGTERM);
263 break;
264
265 case CRM_EX_PANIC:
266 crm_emerg("%s[%d] instructed the machine to reset", name, pid);
267 child->flags &= ~child_respawn;
268 fatal_error = TRUE;
269 pcmk__panic(__func__);
270 pcmk_shutdown(SIGTERM);
271 break;
272
273 default:
274 crm_err("%s[%d] exited with status %d (%s)",
275 name, pid, exitcode, crm_exit_str(exitcode));
276 break;
277 }
278 }
279
280 pcmk_process_exit(child);
281 }
282
283 static void
284 pcmk_process_exit(pcmk_child_t * child)
285 {
286 child->pid = 0;
287 child->flags &= ~child_active_before_startup;
288 child->check_count = 0;
289
290 child->respawn_count += 1;
291 if (child->respawn_count > MAX_RESPAWN) {
292 crm_err("Child respawn count exceeded by %s", child->name);
293 child->flags &= ~child_respawn;
294 }
295
296 if (shutdown_trigger) {
297
298 mainloop_set_trigger(shutdown_trigger);
299
300 } else if (!pcmk_is_set(child->flags, child_respawn)) {
301
302
303 } else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
304 crm_err("Rebooting system because of %s", child->name);
305 pcmk__panic(__func__);
306
307 } else if (child_liveness(child) == pcmk_rc_ok) {
308 crm_warn("One-off suppressing strict respawning of a child process %s,"
309 " appears alright per %s IPC end-point",
310 child->name, child->endpoint);
311
312 } else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) {
313 crm_notice("Not respawning %s subdaemon until cluster returns",
314 child->name);
315 child->flags |= child_needs_retry;
316
317 } else {
318 crm_notice("Respawning %s subdaemon after unexpected exit",
319 child->name);
320 start_child(child);
321 }
322 }
323
324 static gboolean
325 pcmk_shutdown_worker(gpointer user_data)
326 {
327 static int phase = PCMK__NELEM(pcmk_children) - 1;
328 static time_t next_log = 0;
329
330 if (phase == PCMK__NELEM(pcmk_children) - 1) {
331 crm_notice("Shutting down Pacemaker");
332 pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN;
333 }
334
335 for (; phase >= 0; phase--) {
336 pcmk_child_t *child = &(pcmk_children[phase]);
337
338 if (child->pid != 0) {
339 time_t now = time(NULL);
340
341 if (pcmk_is_set(child->flags, child_respawn)) {
342 if (child->pid == PCMK__SPECIAL_PID) {
343 crm_warn("The process behind %s IPC cannot be"
344 " terminated, so either wait the graceful"
345 " period of %ld s for its native termination"
346 " if it vitally depends on some other daemons"
347 " going down in a controlled way already,"
348 " or locate and kill the correct %s process"
349 " on your own; set PCMK_" PCMK__ENV_FAIL_FAST "=1"
350 " to avoid this altogether next time around",
351 child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
352 child->command);
353 }
354 next_log = now + 30;
355 child->flags &= ~child_respawn;
356 stop_child(child, SIGTERM);
357 if (phase < PCMK_CHILD_CONTROLD) {
358 g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
359 escalate_shutdown, child);
360 }
361
362 } else if (now >= next_log) {
363 next_log = now + 30;
364 crm_notice("Still waiting for %s to terminate "
365 CRM_XS " pid=%lld",
366 child->name, (long long) child->pid);
367 }
368 return TRUE;
369 }
370
371
372 crm_debug("%s confirmed stopped", child->name);
373 child->pid = 0;
374 }
375
376 crm_notice("Shutdown complete");
377 pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE;
378 if (!fatal_error && running_with_sbd &&
379 pcmk__get_sbd_sync_resource_startup() &&
380 !shutdown_complete_state_reported_client_closed) {
381 crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
382 return TRUE;
383 }
384
385
386 {
387 const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY);
388 if(delay) {
389 long long delay_ms = crm_get_msec(delay);
390
391 sync();
392 if (delay_ms > 0) {
393 pcmk__sleep_ms((unsigned int) QB_MIN(delay_ms, UINT_MAX));
394 }
395 }
396 }
397
398 g_main_loop_quit(mainloop);
399
400 if (fatal_error) {
401 crm_notice("Shutting down and staying down after fatal error");
402 #ifdef SUPPORT_COROSYNC
403 pcmkd_shutdown_corosync();
404 #endif
405 crm_exit(CRM_EX_FATAL);
406 }
407
408 return TRUE;
409 }
410
411
412
413
414
415
416
417 static int
418 start_child(pcmk_child_t * child)
419 {
420 uid_t uid = 0;
421 gid_t gid = 0;
422 gboolean use_valgrind = FALSE;
423 gboolean use_callgrind = FALSE;
424 const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED);
425 const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED);
426
427 child->flags &= ~child_active_before_startup;
428 child->check_count = 0;
429
430 if (child->command == NULL) {
431 crm_info("Nothing to do for child \"%s\"", child->name);
432 return pcmk_rc_ok;
433 }
434
435 if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
436 use_callgrind = TRUE;
437 use_valgrind = TRUE;
438
439 } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
440 use_callgrind = TRUE;
441 use_valgrind = TRUE;
442
443 } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
444 use_valgrind = TRUE;
445
446 } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
447 use_valgrind = TRUE;
448 }
449
450 if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
451 crm_warn("Cannot enable valgrind for %s:"
452 " The location of the valgrind binary is unknown", child->name);
453 use_valgrind = FALSE;
454 }
455
456 if ((child->uid != NULL) && (crm_user_lookup(child->uid, &uid, &gid) < 0)) {
457 crm_err("Invalid user (%s) for subdaemon %s: not found",
458 child->uid, child->name);
459 return EACCES;
460 }
461
462 child->pid = fork();
463 pcmk__assert(child->pid != -1);
464
465 if (child->pid > 0) {
466
467 mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
468
469 crm_info("Forked process %lld using user %lu (%s) and group %lu "
470 "for subdaemon %s%s",
471 (long long) child->pid, (unsigned long) uid,
472 pcmk__s(child->uid, "root"), (unsigned long) gid, child->name,
473 use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
474 return pcmk_rc_ok;
475
476 } else {
477
478 (void)setsid();
479
480
481 opts_vgrind[0] = pcmk__str_copy(VALGRIND_BIN);
482 if (use_callgrind) {
483 opts_vgrind[1] = pcmk__str_copy("--tool=callgrind");
484 opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file="
485 CRM_STATE_DIR "/callgrind.out.%p");
486 opts_vgrind[3] = pcmk__str_copy(child->command);
487 opts_vgrind[4] = NULL;
488 } else {
489 opts_vgrind[1] = pcmk__str_copy(child->command);
490 opts_vgrind[2] = NULL;
491 opts_vgrind[3] = NULL;
492 opts_vgrind[4] = NULL;
493 }
494 opts_default[0] = pcmk__str_copy(child->command);
495
496 if(gid) {
497
498 if (!need_root_group && (setgid(gid) < 0)) {
499 crm_warn("Could not set group to %d: %s", gid, strerror(errno));
500 }
501
502
503
504
505 if (initgroups(child->uid, gid) < 0) {
506 crm_err("Cannot initialize groups for %s: %s (%d)",
507 child->uid, pcmk_rc_str(errno), errno);
508 }
509 }
510
511 if (uid && setuid(uid) < 0) {
512 crm_warn("Could not set user to %s (id %d): %s",
513 child->uid, uid, strerror(errno));
514 }
515
516 pcmk__close_fds_in_child(true);
517
518 pcmk__open_devnull(O_RDONLY);
519 pcmk__open_devnull(O_WRONLY);
520 pcmk__open_devnull(O_WRONLY);
521
522 if (use_valgrind) {
523 (void)execvp(VALGRIND_BIN, opts_vgrind);
524 } else {
525 (void)execvp(child->command, opts_default);
526 }
527 crm_crit("Could not execute %s: %s", child->command, strerror(errno));
528 crm_exit(CRM_EX_FATAL);
529 }
530 return pcmk_rc_ok;
531 }
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551 static int
552 child_liveness(pcmk_child_t *child)
553 {
554 uid_t cl_uid = 0;
555 gid_t cl_gid = 0;
556 const uid_t root_uid = 0;
557 const gid_t root_gid = 0;
558 const uid_t *ref_uid;
559 const gid_t *ref_gid;
560 int rc = pcmk_rc_ipc_unresponsive;
561 pid_t ipc_pid = 0;
562
563 if (child->endpoint == NULL
564 && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
565 crm_err("Cannot track child %s for missing both API end-point and PID",
566 child->name);
567 rc = EINVAL;
568
569 } else if (child->endpoint != NULL) {
570 int legacy_rc = pcmk_ok;
571
572 if (child->uid == NULL) {
573 ref_uid = &root_uid;
574 ref_gid = &root_gid;
575 } else {
576 ref_uid = &cl_uid;
577 ref_gid = &cl_gid;
578 legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
579 }
580
581 if (legacy_rc < 0) {
582 rc = pcmk_legacy2rc(legacy_rc);
583 crm_err("Could not find user and group IDs for user %s: %s "
584 CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
585 } else {
586 rc = pcmk__ipc_is_authentic_process_active(child->endpoint,
587 *ref_uid, *ref_gid,
588 &ipc_pid);
589 if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
590 if (child->pid <= 0) {
591
592
593
594
595
596 child->pid = ipc_pid;
597 } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
598
599
600
601 rc = pcmk_rc_ipc_unresponsive;
602 }
603 }
604 }
605 }
606
607 if (rc == pcmk_rc_ipc_unresponsive) {
608
609
610
611
612
613
614 int ret = pcmk__pid_active(child->pid, child->name);
615
616 if (ipc_pid && ((ret != pcmk_rc_ok)
617 || ipc_pid == PCMK__SPECIAL_PID
618 || (pcmk__pid_active(ipc_pid,
619 child->name) == pcmk_rc_ok))) {
620
621
622
623
624
625 if (ret == pcmk_rc_ok) {
626
627
628
629
630
631
632
633
634 stop_child(child, SIGKILL);
635 }
636 rc = pcmk_rc_ok;
637 child->pid = ipc_pid;
638 } else if (ret == pcmk_rc_ok) {
639
640 rc = pcmk_rc_ipc_pid_only;
641 } else if ((child->pid == 0) && (ret == EINVAL)) {
642
643 rc = pcmk_rc_ipc_unresponsive;
644 } else {
645 switch (ret) {
646 case EACCES:
647 rc = pcmk_rc_ipc_unauthorized;
648 break;
649 case ESRCH:
650 rc = pcmk_rc_ipc_unresponsive;
651 break;
652 default:
653 rc = ret;
654 break;
655 }
656 }
657 }
658 return rc;
659 }
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688 #define WAIT_TRIES 4
689 int
690 find_and_track_existing_processes(void)
691 {
692 bool wait_in_progress;
693 int rc;
694 size_t i, rounds;
695
696 for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
697 wait_in_progress = false;
698 for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
699
700 if ((pcmk_children[i].endpoint == NULL)
701 || (pcmk_children[i].respawn_count < 0)) {
702 continue;
703 }
704
705 rc = child_liveness(&pcmk_children[i]);
706 if (rc == pcmk_rc_ipc_unresponsive) {
707
708
709
710
711 continue;
712 }
713
714
715 pcmk_children[i].respawn_count = rounds;
716 switch (rc) {
717 case pcmk_rc_ok:
718 if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
719 if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
720 crm_crit("Cannot reliably track pre-existing"
721 " authentic process behind %s IPC on this"
722 " platform and PCMK_" PCMK__ENV_FAIL_FAST
723 " requested",
724 pcmk_children[i].endpoint);
725 return EOPNOTSUPP;
726 } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
727 crm_notice("Assuming pre-existing authentic, though"
728 " on this platform untrackable, process"
729 " behind %s IPC is stable (was in %d"
730 " previous samples) so rather than"
731 " bailing out (PCMK_" PCMK__ENV_FAIL_FAST
732 " not requested), we just switch to a"
733 " less optimal IPC liveness monitoring"
734 " (not very suitable for heavy load)",
735 pcmk_children[i].name, WAIT_TRIES - 1);
736 crm_warn("The process behind %s IPC cannot be"
737 " terminated, so the overall shutdown"
738 " will get delayed implicitly (%ld s),"
739 " which serves as a graceful period for"
740 " its native termination if it vitally"
741 " depends on some other daemons going"
742 " down in a controlled way already",
743 pcmk_children[i].name,
744 (long) SHUTDOWN_ESCALATION_PERIOD);
745 } else {
746 wait_in_progress = true;
747 crm_warn("Cannot reliably track pre-existing"
748 " authentic process behind %s IPC on this"
749 " platform, can still disappear in %d"
750 " attempt(s)", pcmk_children[i].endpoint,
751 WAIT_TRIES - pcmk_children[i].respawn_count);
752 continue;
753 }
754 }
755 crm_notice("Tracking existing %s process (pid=%lld)",
756 pcmk_children[i].name,
757 (long long) PCMK__SPECIAL_PID_AS_0(
758 pcmk_children[i].pid));
759 pcmk_children[i].respawn_count = -1;
760 pcmk_children[i].flags |= child_active_before_startup;
761 break;
762 case pcmk_rc_ipc_pid_only:
763 if (pcmk_children[i].respawn_count == WAIT_TRIES) {
764 crm_crit("%s IPC end-point for existing authentic"
765 " process %lld did not (re)appear",
766 pcmk_children[i].endpoint,
767 (long long) PCMK__SPECIAL_PID_AS_0(
768 pcmk_children[i].pid));
769 return rc;
770 }
771 wait_in_progress = true;
772 crm_warn("Cannot find %s IPC end-point for existing"
773 " authentic process %lld, can still (re)appear"
774 " in %d attempts (?)",
775 pcmk_children[i].endpoint,
776 (long long) PCMK__SPECIAL_PID_AS_0(
777 pcmk_children[i].pid),
778 WAIT_TRIES - pcmk_children[i].respawn_count);
779 continue;
780 default:
781 crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d",
782 pcmk_children[i].name, pcmk_rc_str(rc), rc);
783 return rc;
784 }
785 }
786 if (!wait_in_progress) {
787 break;
788 }
789 pcmk__sleep_ms(250);
790 }
791 for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
792 pcmk_children[i].respawn_count = 0;
793 }
794
795 g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon,
796 NULL);
797 return pcmk_rc_ok;
798 }
799
800 gboolean
801 init_children_processes(void *user_data)
802 {
803 if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
804
805
806
807
808 need_root_group = false;
809 }
810
811
812 for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
813 if (pcmk_children[i].pid != 0) {
814
815 continue;
816 }
817
818 start_child(&(pcmk_children[i]));
819 }
820
821
822
823
824
825
826 pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false);
827 pacemakerd_state = PCMK__VALUE_RUNNING;
828 return TRUE;
829 }
830
831 void
832 pcmk_shutdown(int nsig)
833 {
834 if (shutdown_trigger == NULL) {
835 shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
836 }
837 mainloop_set_trigger(shutdown_trigger);
838 }
839
840 void
841 restart_cluster_subdaemons(void)
842 {
843 for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
844 if (!pcmk_is_set(pcmk_children[i].flags, child_needs_retry) || pcmk_children[i].pid != 0) {
845 continue;
846 }
847
848 crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name);
849 if (start_child(&pcmk_children[i])) {
850 pcmk_children[i].flags &= ~child_needs_retry;
851 }
852 }
853 }
854
855 static gboolean
856 stop_child(pcmk_child_t * child, int signal)
857 {
858 if (signal == 0) {
859 signal = SIGTERM;
860 }
861
862
863
864
865
866 if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
867 crm_debug("Nothing to do for child \"%s\" (process %lld)",
868 child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
869 return TRUE;
870 }
871
872 if (child->pid <= 0) {
873 crm_trace("Client %s not running", child->name);
874 return TRUE;
875 }
876
877 errno = 0;
878 if (kill(child->pid, signal) == 0) {
879 crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld",
880 child->name, signal, (long long) child->pid);
881
882 } else {
883 crm_err("Could not stop %s (process %lld) with signal %d: %s",
884 child->name, (long long) child->pid, signal, strerror(errno));
885 }
886
887 return TRUE;
888 }
889