This source file includes following definitions.
- pcmkd_cluster_connected
- check_next_subdaemon
- escalate_shutdown
- pcmk_child_exit
- pcmk_process_exit
- pcmk_shutdown_worker
- start_child
- child_liveness
- find_and_track_existing_processes
- init_children_processes
- pcmk_shutdown
- restart_cluster_subdaemons
- stop_child
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include "pacemakerd.h"
12
13 #if SUPPORT_COROSYNC
14 #include "pcmkd_corosync.h"
15 #endif
16
17 #include <errno.h>
18 #include <grp.h>
19 #include <signal.h>
20 #include <stdbool.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <sys/types.h>
24 #include <time.h>
25 #include <unistd.h>
26
27 #include <crm/cluster.h>
28 #include <crm/common/xml.h>
29
30 enum child_daemon_flags {
31 child_none = 0,
32 child_respawn = 1 << 0,
33 child_needs_cluster = 1 << 1,
34 child_needs_retry = 1 << 2,
35 child_active_before_startup = 1 << 3,
36 };
37
38 typedef struct pcmk_child_s {
39 pid_t pid;
40 int respawn_count;
41 const char *name;
42 const char *uid;
43 const char *command;
44 const char *endpoint;
45 int check_count;
46 uint32_t flags;
47 } pcmk_child_t;
48
49 #define PCMK_PROCESS_CHECK_INTERVAL 1
50 #define PCMK_PROCESS_CHECK_RETRIES 5
51 #define SHUTDOWN_ESCALATION_PERIOD 180000
52
53
54 #define PCMK_CHILD_CONTROLD 5
55
56 static pcmk_child_t pcmk_children[] = {
57 {
58 0, 0, "pacemaker-based", CRM_DAEMON_USER,
59 CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO,
60 0, child_respawn | child_needs_cluster
61 },
62 {
63 0, 0, "pacemaker-fenced", NULL,
64 CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng",
65 0, child_respawn | child_needs_cluster
66 },
67 {
68 0, 0, "pacemaker-execd", NULL,
69 CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD,
70 0, child_respawn
71 },
72 {
73 0, 0, "pacemaker-attrd", CRM_DAEMON_USER,
74 CRM_DAEMON_DIR "/pacemaker-attrd", PCMK__VALUE_ATTRD,
75 0, child_respawn | child_needs_cluster
76 },
77 {
78 0, 0, "pacemaker-schedulerd", CRM_DAEMON_USER,
79 CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE,
80 0, child_respawn
81 },
82 {
83 0, 0, "pacemaker-controld", CRM_DAEMON_USER,
84 CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD,
85 0, child_respawn | child_needs_cluster
86 },
87 };
88
89 static char *opts_default[] = { NULL, NULL };
90 static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
91
92 crm_trigger_t *shutdown_trigger = NULL;
93 crm_trigger_t *startup_trigger = NULL;
94 time_t subdaemon_check_progress = 0;
95
96
97 static bool need_root_group = true;
98
99
100
101
102
103
104
105
106
107
108
109 unsigned int shutdown_complete_state_reported_to = 0;
110 gboolean shutdown_complete_state_reported_client_closed = FALSE;
111
112
113 const char *pacemakerd_state = PCMK__VALUE_INIT;
114 gboolean running_with_sbd = FALSE;
115
116 GMainLoop *mainloop = NULL;
117
118 static gboolean fatal_error = FALSE;
119
120 static int child_liveness(pcmk_child_t *child);
121 static gboolean escalate_shutdown(gpointer data);
122 static int start_child(pcmk_child_t * child);
123 static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
124 static void pcmk_process_exit(pcmk_child_t * child);
125 static gboolean pcmk_shutdown_worker(gpointer user_data);
126 static gboolean stop_child(pcmk_child_t * child, int signal);
127
128 static bool
129 pcmkd_cluster_connected(void)
130 {
131 #if SUPPORT_COROSYNC
132 return pcmkd_corosync_connected();
133 #else
134 return true;
135 #endif
136 }
137
138 static gboolean
139 check_next_subdaemon(gpointer user_data)
140 {
141 static int next_child = 0;
142
143 pcmk_child_t *child = &(pcmk_children[next_child]);
144 const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid);
145 int rc = child_liveness(child);
146
147 crm_trace("Checked %s[%lld]: %s (%d)",
148 child->name, pid, pcmk_rc_str(rc), rc);
149
150 switch (rc) {
151 case pcmk_rc_ok:
152 child->check_count = 0;
153 subdaemon_check_progress = time(NULL);
154 break;
155
156 case pcmk_rc_ipc_pid_only:
157 if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) {
158 crm_crit("%s[%lld] is unresponsive to IPC after %d attempt%s "
159 "and will now be killed",
160 child->name, pid, child->check_count,
161 pcmk__plural_s(child->check_count));
162 stop_child(child, SIGKILL);
163 if (pcmk_is_set(child->flags, child_respawn)) {
164
165 child->check_count = 0;
166 }
167 } else {
168 crm_notice("%s[%lld] is unresponsive to IPC after %d attempt%s",
169 child->name, pid, child->check_count,
170 pcmk__plural_s(child->check_count));
171 if (pcmk_is_set(child->flags, child_respawn)) {
172
173
174
175
176
177 subdaemon_check_progress = time(NULL);
178 }
179 }
180
181
182
183 break;
184 case pcmk_rc_ipc_unresponsive:
185 if (!pcmk_is_set(child->flags, child_respawn)) {
186
187
188
189
190
191
192 if (child->respawn_count <= MAX_RESPAWN) {
193 subdaemon_check_progress = time(NULL);
194 }
195 }
196 if (!pcmk_is_set(child->flags, child_active_before_startup)) {
197 crm_trace("%s[%lld] terminated (relying on SIGCHLD handler)",
198 child->name, pid);
199 break;
200 }
201 if (pcmk_is_set(child->flags, child_respawn)) {
202 crm_err("%s[%lld] terminated", child->name, pid);
203 } else {
204
205 crm_notice("%s[%lld] terminated", child->name, pid);
206 }
207 pcmk_process_exit(child);
208 break;
209 default:
210 crm_exit(CRM_EX_FATAL);
211 break;
212 }
213
214 if (++next_child >= PCMK__NELEM(pcmk_children)) {
215 next_child = 0;
216 }
217
218 return G_SOURCE_CONTINUE;
219 }
220
221 static gboolean
222 escalate_shutdown(gpointer data)
223 {
224 pcmk_child_t *child = data;
225
226 if (child->pid == PCMK__SPECIAL_PID) {
227 pcmk_process_exit(child);
228
229 } else if (child->pid != 0) {
230
231 crm_err("Child %s not terminating in a timely manner, forcing", child->name);
232 stop_child(child, SIGSEGV);
233 }
234 return FALSE;
235 }
236
237 static void
238 pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
239 {
240 pcmk_child_t *child = mainloop_child_userdata(p);
241 const char *name = mainloop_child_name(p);
242
243 if (signo) {
244 do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
245 "%s[%d] terminated with signal %d (%s)%s",
246 name, pid, signo, strsignal(signo),
247 (core? " and dumped core" : ""));
248
249 } else {
250 switch(exitcode) {
251 case CRM_EX_OK:
252 crm_info("%s[%d] exited with status %d (%s)",
253 name, pid, exitcode, crm_exit_str(exitcode));
254 break;
255
256 case CRM_EX_FATAL:
257 crm_warn("Shutting cluster down because %s[%d] had fatal failure",
258 name, pid);
259 child->flags &= ~child_respawn;
260 fatal_error = TRUE;
261 pcmk_shutdown(SIGTERM);
262 break;
263
264 case CRM_EX_PANIC:
265 crm_emerg("%s[%d] instructed the machine to reset", name, pid);
266 child->flags &= ~child_respawn;
267 fatal_error = TRUE;
268 pcmk__panic(__func__);
269 pcmk_shutdown(SIGTERM);
270 break;
271
272 default:
273 crm_err("%s[%d] exited with status %d (%s)",
274 name, pid, exitcode, crm_exit_str(exitcode));
275 break;
276 }
277 }
278
279 pcmk_process_exit(child);
280 }
281
282 static void
283 pcmk_process_exit(pcmk_child_t * child)
284 {
285 child->pid = 0;
286 child->flags &= ~child_active_before_startup;
287 child->check_count = 0;
288
289 child->respawn_count += 1;
290 if (child->respawn_count > MAX_RESPAWN) {
291 crm_err("Child respawn count exceeded by %s", child->name);
292 child->flags &= ~child_respawn;
293 }
294
295 if (shutdown_trigger) {
296
297 mainloop_set_trigger(shutdown_trigger);
298
299 } else if (!pcmk_is_set(child->flags, child_respawn)) {
300
301
302 } else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
303 crm_err("Rebooting system because of %s", child->name);
304 pcmk__panic(__func__);
305
306 } else if (child_liveness(child) == pcmk_rc_ok) {
307 crm_warn("One-off suppressing strict respawning of a child process %s,"
308 " appears alright per %s IPC end-point",
309 child->name, child->endpoint);
310
311 } else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) {
312 crm_notice("Not respawning %s subdaemon until cluster returns",
313 child->name);
314 child->flags |= child_needs_retry;
315
316 } else {
317 crm_notice("Respawning %s subdaemon after unexpected exit",
318 child->name);
319 start_child(child);
320 }
321 }
322
323 static gboolean
324 pcmk_shutdown_worker(gpointer user_data)
325 {
326 static int phase = PCMK__NELEM(pcmk_children) - 1;
327 static time_t next_log = 0;
328
329 if (phase == PCMK__NELEM(pcmk_children) - 1) {
330 crm_notice("Shutting down Pacemaker");
331 pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN;
332 }
333
334 for (; phase >= 0; phase--) {
335 pcmk_child_t *child = &(pcmk_children[phase]);
336
337 if (child->pid != 0) {
338 time_t now = time(NULL);
339
340 if (pcmk_is_set(child->flags, child_respawn)) {
341 if (child->pid == PCMK__SPECIAL_PID) {
342 crm_warn("The process behind %s IPC cannot be"
343 " terminated, so either wait the graceful"
344 " period of %ld s for its native termination"
345 " if it vitally depends on some other daemons"
346 " going down in a controlled way already,"
347 " or locate and kill the correct %s process"
348 " on your own; set PCMK_" PCMK__ENV_FAIL_FAST "=1"
349 " to avoid this altogether next time around",
350 child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
351 child->command);
352 }
353 next_log = now + 30;
354 child->flags &= ~child_respawn;
355 stop_child(child, SIGTERM);
356 if (phase < PCMK_CHILD_CONTROLD) {
357 g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
358 escalate_shutdown, child);
359 }
360
361 } else if (now >= next_log) {
362 next_log = now + 30;
363 crm_notice("Still waiting for %s to terminate "
364 CRM_XS " pid=%lld",
365 child->name, (long long) child->pid);
366 }
367 return TRUE;
368 }
369
370
371 crm_debug("%s confirmed stopped", child->name);
372 child->pid = 0;
373 }
374
375 crm_notice("Shutdown complete");
376 pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE;
377 if (!fatal_error && running_with_sbd &&
378 pcmk__get_sbd_sync_resource_startup() &&
379 !shutdown_complete_state_reported_client_closed) {
380 crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
381 return TRUE;
382 }
383
384
385 {
386 const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY);
387 if(delay) {
388 long long delay_ms = crm_get_msec(delay);
389
390 sync();
391 if (delay_ms > 0) {
392 pcmk__sleep_ms((unsigned int) QB_MIN(delay_ms, UINT_MAX));
393 }
394 }
395 }
396
397 g_main_loop_quit(mainloop);
398
399 if (fatal_error) {
400 crm_notice("Shutting down and staying down after fatal error");
401 #ifdef SUPPORT_COROSYNC
402 pcmkd_shutdown_corosync();
403 #endif
404 crm_exit(CRM_EX_FATAL);
405 }
406
407 return TRUE;
408 }
409
410
411
412
413
414
415
416 static int
417 start_child(pcmk_child_t * child)
418 {
419 uid_t uid = 0;
420 gid_t gid = 0;
421 gboolean use_valgrind = FALSE;
422 gboolean use_callgrind = FALSE;
423 const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED);
424 const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED);
425
426 child->flags &= ~child_active_before_startup;
427 child->check_count = 0;
428
429 if (child->command == NULL) {
430 crm_info("Nothing to do for child \"%s\"", child->name);
431 return pcmk_rc_ok;
432 }
433
434 if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
435 use_callgrind = TRUE;
436 use_valgrind = TRUE;
437
438 } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
439 use_callgrind = TRUE;
440 use_valgrind = TRUE;
441
442 } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
443 use_valgrind = TRUE;
444
445 } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
446 use_valgrind = TRUE;
447 }
448
449 if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
450 crm_warn("Cannot enable valgrind for %s:"
451 " The location of the valgrind binary is unknown", child->name);
452 use_valgrind = FALSE;
453 }
454
455 if (child->uid) {
456 if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
457 crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
458 return EACCES;
459 }
460 crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
461 }
462
463 child->pid = fork();
464 CRM_ASSERT(child->pid != -1);
465
466 if (child->pid > 0) {
467
468 mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
469
470 crm_info("Forked child %lld for process %s%s",
471 (long long) child->pid, child->name,
472 use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
473 return pcmk_rc_ok;
474
475 } else {
476
477 (void)setsid();
478
479
480 opts_vgrind[0] = pcmk__str_copy(VALGRIND_BIN);
481 if (use_callgrind) {
482 opts_vgrind[1] = pcmk__str_copy("--tool=callgrind");
483 opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file="
484 CRM_STATE_DIR "/callgrind.out.%p");
485 opts_vgrind[3] = pcmk__str_copy(child->command);
486 opts_vgrind[4] = NULL;
487 } else {
488 opts_vgrind[1] = pcmk__str_copy(child->command);
489 opts_vgrind[2] = NULL;
490 opts_vgrind[3] = NULL;
491 opts_vgrind[4] = NULL;
492 }
493 opts_default[0] = pcmk__str_copy(child->command);
494
495 if(gid) {
496
497 if (!need_root_group && (setgid(gid) < 0)) {
498 crm_warn("Could not set group to %d: %s", gid, strerror(errno));
499 }
500
501
502
503
504 if (initgroups(child->uid, gid) < 0) {
505 crm_err("Cannot initialize groups for %s: %s (%d)",
506 child->uid, pcmk_rc_str(errno), errno);
507 }
508 }
509
510 if (uid && setuid(uid) < 0) {
511 crm_warn("Could not set user to %s (id %d): %s",
512 child->uid, uid, strerror(errno));
513 }
514
515 pcmk__close_fds_in_child(true);
516
517 pcmk__open_devnull(O_RDONLY);
518 pcmk__open_devnull(O_WRONLY);
519 pcmk__open_devnull(O_WRONLY);
520
521 if (use_valgrind) {
522 (void)execvp(VALGRIND_BIN, opts_vgrind);
523 } else {
524 (void)execvp(child->command, opts_default);
525 }
526 crm_crit("Could not execute %s: %s", child->command, strerror(errno));
527 crm_exit(CRM_EX_FATAL);
528 }
529 return pcmk_rc_ok;
530 }
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550 static int
551 child_liveness(pcmk_child_t *child)
552 {
553 uid_t cl_uid = 0;
554 gid_t cl_gid = 0;
555 const uid_t root_uid = 0;
556 const gid_t root_gid = 0;
557 const uid_t *ref_uid;
558 const gid_t *ref_gid;
559 int rc = pcmk_rc_ipc_unresponsive;
560 pid_t ipc_pid = 0;
561
562 if (child->endpoint == NULL
563 && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
564 crm_err("Cannot track child %s for missing both API end-point and PID",
565 child->name);
566 rc = EINVAL;
567
568 } else if (child->endpoint != NULL) {
569 int legacy_rc = pcmk_ok;
570
571 if (child->uid == NULL) {
572 ref_uid = &root_uid;
573 ref_gid = &root_gid;
574 } else {
575 ref_uid = &cl_uid;
576 ref_gid = &cl_gid;
577 legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
578 }
579
580 if (legacy_rc < 0) {
581 rc = pcmk_legacy2rc(legacy_rc);
582 crm_err("Could not find user and group IDs for user %s: %s "
583 CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
584 } else {
585 rc = pcmk__ipc_is_authentic_process_active(child->endpoint,
586 *ref_uid, *ref_gid,
587 &ipc_pid);
588 if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
589 if (child->pid <= 0) {
590
591
592
593
594
595 child->pid = ipc_pid;
596 } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
597
598
599
600 rc = pcmk_rc_ipc_unresponsive;
601 }
602 }
603 }
604 }
605
606 if (rc == pcmk_rc_ipc_unresponsive) {
607
608
609
610
611
612
613 int ret = pcmk__pid_active(child->pid, child->name);
614
615 if (ipc_pid && ((ret != pcmk_rc_ok)
616 || ipc_pid == PCMK__SPECIAL_PID
617 || (pcmk__pid_active(ipc_pid,
618 child->name) == pcmk_rc_ok))) {
619
620
621
622
623
624 if (ret == pcmk_rc_ok) {
625
626
627
628
629
630
631
632
633 stop_child(child, SIGKILL);
634 }
635 rc = pcmk_rc_ok;
636 child->pid = ipc_pid;
637 } else if (ret == pcmk_rc_ok) {
638
639 rc = pcmk_rc_ipc_pid_only;
640 } else if ((child->pid == 0) && (ret == EINVAL)) {
641
642 rc = pcmk_rc_ipc_unresponsive;
643 } else {
644 switch (ret) {
645 case EACCES:
646 rc = pcmk_rc_ipc_unauthorized;
647 break;
648 case ESRCH:
649 rc = pcmk_rc_ipc_unresponsive;
650 break;
651 default:
652 rc = ret;
653 break;
654 }
655 }
656 }
657 return rc;
658 }
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687 #define WAIT_TRIES 4
688 int
689 find_and_track_existing_processes(void)
690 {
691 bool wait_in_progress;
692 int rc;
693 size_t i, rounds;
694
695 for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
696 wait_in_progress = false;
697 for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
698
699 if ((pcmk_children[i].endpoint == NULL)
700 || (pcmk_children[i].respawn_count < 0)) {
701 continue;
702 }
703
704 rc = child_liveness(&pcmk_children[i]);
705 if (rc == pcmk_rc_ipc_unresponsive) {
706
707
708
709
710 continue;
711 }
712
713
714 pcmk_children[i].respawn_count = rounds;
715 switch (rc) {
716 case pcmk_rc_ok:
717 if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
718 if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
719 crm_crit("Cannot reliably track pre-existing"
720 " authentic process behind %s IPC on this"
721 " platform and PCMK_" PCMK__ENV_FAIL_FAST
722 " requested",
723 pcmk_children[i].endpoint);
724 return EOPNOTSUPP;
725 } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
726 crm_notice("Assuming pre-existing authentic, though"
727 " on this platform untrackable, process"
728 " behind %s IPC is stable (was in %d"
729 " previous samples) so rather than"
730 " bailing out (PCMK_" PCMK__ENV_FAIL_FAST
731 " not requested), we just switch to a"
732 " less optimal IPC liveness monitoring"
733 " (not very suitable for heavy load)",
734 pcmk_children[i].name, WAIT_TRIES - 1);
735 crm_warn("The process behind %s IPC cannot be"
736 " terminated, so the overall shutdown"
737 " will get delayed implicitly (%ld s),"
738 " which serves as a graceful period for"
739 " its native termination if it vitally"
740 " depends on some other daemons going"
741 " down in a controlled way already",
742 pcmk_children[i].name,
743 (long) SHUTDOWN_ESCALATION_PERIOD);
744 } else {
745 wait_in_progress = true;
746 crm_warn("Cannot reliably track pre-existing"
747 " authentic process behind %s IPC on this"
748 " platform, can still disappear in %d"
749 " attempt(s)", pcmk_children[i].endpoint,
750 WAIT_TRIES - pcmk_children[i].respawn_count);
751 continue;
752 }
753 }
754 crm_notice("Tracking existing %s process (pid=%lld)",
755 pcmk_children[i].name,
756 (long long) PCMK__SPECIAL_PID_AS_0(
757 pcmk_children[i].pid));
758 pcmk_children[i].respawn_count = -1;
759 pcmk_children[i].flags |= child_active_before_startup;
760 break;
761 case pcmk_rc_ipc_pid_only:
762 if (pcmk_children[i].respawn_count == WAIT_TRIES) {
763 crm_crit("%s IPC end-point for existing authentic"
764 " process %lld did not (re)appear",
765 pcmk_children[i].endpoint,
766 (long long) PCMK__SPECIAL_PID_AS_0(
767 pcmk_children[i].pid));
768 return rc;
769 }
770 wait_in_progress = true;
771 crm_warn("Cannot find %s IPC end-point for existing"
772 " authentic process %lld, can still (re)appear"
773 " in %d attempts (?)",
774 pcmk_children[i].endpoint,
775 (long long) PCMK__SPECIAL_PID_AS_0(
776 pcmk_children[i].pid),
777 WAIT_TRIES - pcmk_children[i].respawn_count);
778 continue;
779 default:
780 crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d",
781 pcmk_children[i].name, pcmk_rc_str(rc), rc);
782 return rc;
783 }
784 }
785 if (!wait_in_progress) {
786 break;
787 }
788 pcmk__sleep_ms(250);
789 }
790 for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
791 pcmk_children[i].respawn_count = 0;
792 }
793
794 g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon,
795 NULL);
796 return pcmk_rc_ok;
797 }
798
799 gboolean
800 init_children_processes(void *user_data)
801 {
802 if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
803
804
805
806
807 need_root_group = false;
808 }
809
810
811 for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
812 if (pcmk_children[i].pid != 0) {
813
814 continue;
815 }
816
817 start_child(&(pcmk_children[i]));
818 }
819
820
821
822
823
824
825 pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false);
826 pacemakerd_state = PCMK__VALUE_RUNNING;
827 return TRUE;
828 }
829
830 void
831 pcmk_shutdown(int nsig)
832 {
833 if (shutdown_trigger == NULL) {
834 shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
835 }
836 mainloop_set_trigger(shutdown_trigger);
837 }
838
839 void
840 restart_cluster_subdaemons(void)
841 {
842 for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
843 if (!pcmk_is_set(pcmk_children[i].flags, child_needs_retry) || pcmk_children[i].pid != 0) {
844 continue;
845 }
846
847 crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name);
848 if (start_child(&pcmk_children[i])) {
849 pcmk_children[i].flags &= ~child_needs_retry;
850 }
851 }
852 }
853
854 static gboolean
855 stop_child(pcmk_child_t * child, int signal)
856 {
857 if (signal == 0) {
858 signal = SIGTERM;
859 }
860
861
862
863
864
865 if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
866 crm_debug("Nothing to do for child \"%s\" (process %lld)",
867 child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
868 return TRUE;
869 }
870
871 if (child->pid <= 0) {
872 crm_trace("Client %s not running", child->name);
873 return TRUE;
874 }
875
876 errno = 0;
877 if (kill(child->pid, signal) == 0) {
878 crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld",
879 child->name, signal, (long long) child->pid);
880
881 } else {
882 crm_err("Could not stop %s (process %lld) with signal %d: %s",
883 child->name, (long long) child->pid, signal, strerror(errno));
884 }
885
886 return TRUE;
887 }
888