This source file includes following definitions.
- pcmkd_cluster_connected
- check_next_subdaemon
- escalate_shutdown
- pcmk_child_exit
- pcmk_process_exit
- pcmk_shutdown_worker
- start_child
- child_liveness
- find_and_track_existing_processes
- init_children_processes
- pcmk_shutdown
- restart_cluster_subdaemons
- stop_child
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include "pacemakerd.h"
12
13 #include <errno.h>
14 #include <grp.h>
15 #include <signal.h>
16 #include <stdbool.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/types.h>
20 #include <time.h>
21 #include <unistd.h>
22
23 #include <crm/cluster.h>
24 #include <crm/msg_xml.h>
25
26 typedef struct pcmk_child_s {
27 pid_t pid;
28 int respawn_count;
29 bool respawn;
30 const char *name;
31 const char *uid;
32 const char *command;
33 const char *endpoint;
34 bool needs_cluster;
35 int check_count;
36
37
38 bool needs_retry;
39 bool active_before_startup;
40 } pcmk_child_t;
41
42 #define PCMK_PROCESS_CHECK_INTERVAL 1
43 #define PCMK_PROCESS_CHECK_RETRIES 5
44 #define SHUTDOWN_ESCALATION_PERIOD 180000
45
46
47 #define PCMK_CHILD_CONTROLD 5
48
49 static pcmk_child_t pcmk_children[] = {
50 {
51 0, 0, true, "pacemaker-based", CRM_DAEMON_USER,
52 CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO,
53 true
54 },
55 {
56 0, 0, true, "pacemaker-fenced", NULL,
57 CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng",
58 true
59 },
60 {
61 0, 0, true, "pacemaker-execd", NULL,
62 CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD,
63 false
64 },
65 {
66 0, 0, true, "pacemaker-attrd", CRM_DAEMON_USER,
67 CRM_DAEMON_DIR "/pacemaker-attrd", T_ATTRD,
68 true
69 },
70 {
71 0, 0, true, "pacemaker-schedulerd", CRM_DAEMON_USER,
72 CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE,
73 false
74 },
75 {
76 0, 0, true, "pacemaker-controld", CRM_DAEMON_USER,
77 CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD,
78 true
79 },
80 };
81
82 static char *opts_default[] = { NULL, NULL };
83 static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
84
85 crm_trigger_t *shutdown_trigger = NULL;
86 crm_trigger_t *startup_trigger = NULL;
87 time_t subdaemon_check_progress = 0;
88
89
90 static bool need_root_group = true;
91
92
93
94
95
96
97
98
99
100
101
102 unsigned int shutdown_complete_state_reported_to = 0;
103 gboolean shutdown_complete_state_reported_client_closed = FALSE;
104
105
106 const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
107 gboolean running_with_sbd = FALSE;
108
109 GMainLoop *mainloop = NULL;
110
111 static gboolean fatal_error = FALSE;
112
113 static int child_liveness(pcmk_child_t *child);
114 static gboolean escalate_shutdown(gpointer data);
115 static int start_child(pcmk_child_t * child);
116 static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
117 static void pcmk_process_exit(pcmk_child_t * child);
118 static gboolean pcmk_shutdown_worker(gpointer user_data);
119 static gboolean stop_child(pcmk_child_t * child, int signal);
120
121 static bool
122 pcmkd_cluster_connected(void)
123 {
124 #if SUPPORT_COROSYNC
125 return pcmkd_corosync_connected();
126 #else
127 return true;
128 #endif
129 }
130
131 static gboolean
132 check_next_subdaemon(gpointer user_data)
133 {
134 static int next_child = 0;
135 int rc = child_liveness(&pcmk_children[next_child]);
136
137 crm_trace("Checked %s[%lld]: %s (%d)",
138 pcmk_children[next_child].name,
139 (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[next_child].pid),
140 pcmk_rc_str(rc), rc);
141
142 switch (rc) {
143 case pcmk_rc_ok:
144 pcmk_children[next_child].check_count = 0;
145 subdaemon_check_progress = time(NULL);
146 break;
147 case pcmk_rc_ipc_pid_only:
148 pcmk_children[next_child].check_count++;
149 if (pcmk_children[next_child].check_count >= PCMK_PROCESS_CHECK_RETRIES) {
150 crm_err("%s[%lld] is unresponsive to ipc after %d tries but "
151 "we found the pid so have it killed that we can restart",
152 pcmk_children[next_child].name,
153 (long long) PCMK__SPECIAL_PID_AS_0(
154 pcmk_children[next_child].pid),
155 pcmk_children[next_child].check_count);
156 stop_child(&pcmk_children[next_child], SIGKILL);
157 if (pcmk_children[next_child].respawn) {
158
159
160
161 pcmk_children[next_child].check_count = 0;
162 }
163 } else {
164 crm_notice("%s[%lld] is unresponsive to ipc after %d tries",
165 pcmk_children[next_child].name,
166 (long long) PCMK__SPECIAL_PID_AS_0(
167 pcmk_children[next_child].pid),
168 pcmk_children[next_child].check_count);
169 if (pcmk_children[next_child].respawn) {
170
171
172
173
174
175 subdaemon_check_progress = time(NULL);
176 }
177 }
178
179
180
181 break;
182 case pcmk_rc_ipc_unresponsive:
183 if (!pcmk_children[next_child].respawn) {
184
185
186
187
188
189
190 if (pcmk_children[next_child].respawn_count <= MAX_RESPAWN) {
191 subdaemon_check_progress = time(NULL);
192 }
193 }
194 if (!pcmk_children[next_child].active_before_startup) {
195 crm_trace("found %s[%lld] missing - signal-handler "
196 "will take care of it",
197 pcmk_children[next_child].name,
198 (long long) PCMK__SPECIAL_PID_AS_0(
199 pcmk_children[next_child].pid));
200 break;
201 }
202 if (pcmk_children[next_child].respawn) {
203 crm_err("%s[%lld] terminated",
204 pcmk_children[next_child].name,
205 (long long) PCMK__SPECIAL_PID_AS_0(
206 pcmk_children[next_child].pid));
207 } else {
208
209 crm_notice("%s[%lld] terminated",
210 pcmk_children[next_child].name,
211 (long long) PCMK__SPECIAL_PID_AS_0(
212 pcmk_children[next_child].pid));
213 }
214 pcmk_process_exit(&(pcmk_children[next_child]));
215 break;
216 default:
217 crm_exit(CRM_EX_FATAL);
218 break;
219 }
220
221 next_child++;
222 if (next_child >= PCMK__NELEM(pcmk_children)) {
223 next_child = 0;
224 }
225
226 return G_SOURCE_CONTINUE;
227 }
228
229 static gboolean
230 escalate_shutdown(gpointer data)
231 {
232 pcmk_child_t *child = data;
233
234 if (child->pid == PCMK__SPECIAL_PID) {
235 pcmk_process_exit(child);
236
237 } else if (child->pid != 0) {
238
239 crm_err("Child %s not terminating in a timely manner, forcing", child->name);
240 stop_child(child, SIGSEGV);
241 }
242 return FALSE;
243 }
244
245 static void
246 pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
247 {
248 pcmk_child_t *child = mainloop_child_userdata(p);
249 const char *name = mainloop_child_name(p);
250
251 if (signo) {
252 do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
253 "%s[%d] terminated with signal %d (%s)%s",
254 name, pid, signo, strsignal(signo),
255 (core? " and dumped core" : ""));
256
257 } else {
258 switch(exitcode) {
259 case CRM_EX_OK:
260 crm_info("%s[%d] exited with status %d (%s)",
261 name, pid, exitcode, crm_exit_str(exitcode));
262 break;
263
264 case CRM_EX_FATAL:
265 crm_warn("Shutting cluster down because %s[%d] had fatal failure",
266 name, pid);
267 child->respawn = false;
268 fatal_error = TRUE;
269 pcmk_shutdown(SIGTERM);
270 break;
271
272 case CRM_EX_PANIC:
273 crm_emerg("%s[%d] instructed the machine to reset", name, pid);
274 child->respawn = false;
275 fatal_error = TRUE;
276 pcmk__panic(__func__);
277 pcmk_shutdown(SIGTERM);
278 break;
279
280 default:
281 crm_err("%s[%d] exited with status %d (%s)",
282 name, pid, exitcode, crm_exit_str(exitcode));
283 break;
284 }
285 }
286
287 pcmk_process_exit(child);
288 }
289
290 static void
291 pcmk_process_exit(pcmk_child_t * child)
292 {
293 child->pid = 0;
294 child->active_before_startup = false;
295 child->check_count = 0;
296
297 child->respawn_count += 1;
298 if (child->respawn_count > MAX_RESPAWN) {
299 crm_err("Child respawn count exceeded by %s", child->name);
300 child->respawn = false;
301 }
302
303 if (shutdown_trigger) {
304
305 mainloop_set_trigger(shutdown_trigger);
306
307 } else if (!child->respawn) {
308
309
310 } else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
311 crm_err("Rebooting system because of %s", child->name);
312 pcmk__panic(__func__);
313
314 } else if (child_liveness(child) == pcmk_rc_ok) {
315 crm_warn("One-off suppressing strict respawning of a child process %s,"
316 " appears alright per %s IPC end-point",
317 child->name, child->endpoint);
318
319 } else if (child->needs_cluster && !pcmkd_cluster_connected()) {
320 crm_notice("Not respawning %s subdaemon until cluster returns",
321 child->name);
322 child->needs_retry = true;
323
324 } else {
325 crm_notice("Respawning %s subdaemon after unexpected exit",
326 child->name);
327 start_child(child);
328 }
329 }
330
331 static gboolean
332 pcmk_shutdown_worker(gpointer user_data)
333 {
334 static int phase = PCMK__NELEM(pcmk_children) - 1;
335 static time_t next_log = 0;
336
337 if (phase == PCMK__NELEM(pcmk_children) - 1) {
338 crm_notice("Shutting down Pacemaker");
339 pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
340 }
341
342 for (; phase >= 0; phase--) {
343 pcmk_child_t *child = &(pcmk_children[phase]);
344
345 if (child->pid != 0) {
346 time_t now = time(NULL);
347
348 if (child->respawn) {
349 if (child->pid == PCMK__SPECIAL_PID) {
350 crm_warn("The process behind %s IPC cannot be"
351 " terminated, so either wait the graceful"
352 " period of %ld s for its native termination"
353 " if it vitally depends on some other daemons"
354 " going down in a controlled way already,"
355 " or locate and kill the correct %s process"
356 " on your own; set PCMK_" PCMK__ENV_FAIL_FAST "=1"
357 " to avoid this altogether next time around",
358 child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
359 child->command);
360 }
361 next_log = now + 30;
362 child->respawn = false;
363 stop_child(child, SIGTERM);
364 if (phase < PCMK_CHILD_CONTROLD) {
365 g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
366 escalate_shutdown, child);
367 }
368
369 } else if (now >= next_log) {
370 next_log = now + 30;
371 crm_notice("Still waiting for %s to terminate "
372 CRM_XS " pid=%lld",
373 child->name, (long long) child->pid);
374 }
375 return TRUE;
376 }
377
378
379 crm_debug("%s confirmed stopped", child->name);
380 child->pid = 0;
381 }
382
383 crm_notice("Shutdown complete");
384 pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
385 if (!fatal_error && running_with_sbd &&
386 pcmk__get_sbd_sync_resource_startup() &&
387 !shutdown_complete_state_reported_client_closed) {
388 crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
389 return TRUE;
390 }
391
392
393 {
394 const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY);
395 if(delay) {
396 sync();
397 pcmk__sleep_ms(crm_get_msec(delay));
398 }
399 }
400
401 g_main_loop_quit(mainloop);
402
403 if (fatal_error) {
404 crm_notice("Shutting down and staying down after fatal error");
405 #ifdef SUPPORT_COROSYNC
406 pcmkd_shutdown_corosync();
407 #endif
408 crm_exit(CRM_EX_FATAL);
409 }
410
411 return TRUE;
412 }
413
414
415
416
417
418
419
420 static int
421 start_child(pcmk_child_t * child)
422 {
423 uid_t uid = 0;
424 gid_t gid = 0;
425 gboolean use_valgrind = FALSE;
426 gboolean use_callgrind = FALSE;
427 const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED);
428 const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED);
429
430 child->active_before_startup = false;
431 child->check_count = 0;
432
433 if (child->command == NULL) {
434 crm_info("Nothing to do for child \"%s\"", child->name);
435 return pcmk_rc_ok;
436 }
437
438 if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
439 use_callgrind = TRUE;
440 use_valgrind = TRUE;
441
442 } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
443 use_callgrind = TRUE;
444 use_valgrind = TRUE;
445
446 } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
447 use_valgrind = TRUE;
448
449 } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
450 use_valgrind = TRUE;
451 }
452
453 if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
454 crm_warn("Cannot enable valgrind for %s:"
455 " The location of the valgrind binary is unknown", child->name);
456 use_valgrind = FALSE;
457 }
458
459 if (child->uid) {
460 if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
461 crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
462 return EACCES;
463 }
464 crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
465 }
466
467 child->pid = fork();
468 CRM_ASSERT(child->pid != -1);
469
470 if (child->pid > 0) {
471
472 mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
473
474 crm_info("Forked child %lld for process %s%s",
475 (long long) child->pid, child->name,
476 use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
477 return pcmk_rc_ok;
478
479 } else {
480
481 (void)setsid();
482
483
484 opts_vgrind[0] = strdup(VALGRIND_BIN);
485 if (use_callgrind) {
486 opts_vgrind[1] = strdup("--tool=callgrind");
487 opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
488 opts_vgrind[3] = strdup(child->command);
489 opts_vgrind[4] = NULL;
490 } else {
491 opts_vgrind[1] = strdup(child->command);
492 opts_vgrind[2] = NULL;
493 opts_vgrind[3] = NULL;
494 opts_vgrind[4] = NULL;
495 }
496 opts_default[0] = strdup(child->command);
497
498 if(gid) {
499
500 if (!need_root_group && (setgid(gid) < 0)) {
501 crm_warn("Could not set group to %d: %s", gid, strerror(errno));
502 }
503
504
505
506
507 if (initgroups(child->uid, gid) < 0) {
508 crm_err("Cannot initialize groups for %s: %s (%d)",
509 child->uid, pcmk_rc_str(errno), errno);
510 }
511 }
512
513 if (uid && setuid(uid) < 0) {
514 crm_warn("Could not set user to %s (id %d): %s",
515 child->uid, uid, strerror(errno));
516 }
517
518 pcmk__close_fds_in_child(true);
519
520 pcmk__open_devnull(O_RDONLY);
521 pcmk__open_devnull(O_WRONLY);
522 pcmk__open_devnull(O_WRONLY);
523
524 if (use_valgrind) {
525 (void)execvp(VALGRIND_BIN, opts_vgrind);
526 } else {
527 (void)execvp(child->command, opts_default);
528 }
529 crm_crit("Could not execute %s: %s", child->command, strerror(errno));
530 crm_exit(CRM_EX_FATAL);
531 }
532 return pcmk_rc_ok;
533 }
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553 static int
554 child_liveness(pcmk_child_t *child)
555 {
556 uid_t cl_uid = 0;
557 gid_t cl_gid = 0;
558 const uid_t root_uid = 0;
559 const gid_t root_gid = 0;
560 const uid_t *ref_uid;
561 const gid_t *ref_gid;
562 int rc = pcmk_rc_ipc_unresponsive;
563 pid_t ipc_pid = 0;
564
565 if (child->endpoint == NULL
566 && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
567 crm_err("Cannot track child %s for missing both API end-point and PID",
568 child->name);
569 rc = EINVAL;
570
571 } else if (child->endpoint != NULL) {
572 int legacy_rc = pcmk_ok;
573
574 if (child->uid == NULL) {
575 ref_uid = &root_uid;
576 ref_gid = &root_gid;
577 } else {
578 ref_uid = &cl_uid;
579 ref_gid = &cl_gid;
580 legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
581 }
582
583 if (legacy_rc < 0) {
584 rc = pcmk_legacy2rc(legacy_rc);
585 crm_err("Could not find user and group IDs for user %s: %s "
586 CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
587 } else {
588 rc = pcmk__ipc_is_authentic_process_active(child->endpoint,
589 *ref_uid, *ref_gid,
590 &ipc_pid);
591 if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
592 if (child->pid <= 0) {
593
594
595
596
597
598 child->pid = ipc_pid;
599 } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
600
601
602
603 rc = pcmk_rc_ipc_unresponsive;
604 }
605 }
606 }
607 }
608
609 if (rc == pcmk_rc_ipc_unresponsive) {
610
611
612
613
614
615
616 int ret = pcmk__pid_active(child->pid, child->name);
617
618 if (ipc_pid && ((ret != pcmk_rc_ok)
619 || ipc_pid == PCMK__SPECIAL_PID
620 || (pcmk__pid_active(ipc_pid,
621 child->name) == pcmk_rc_ok))) {
622
623
624
625
626
627 if (ret == pcmk_rc_ok) {
628
629
630
631
632
633
634
635
636 stop_child(child, SIGKILL);
637 }
638 rc = pcmk_rc_ok;
639 child->pid = ipc_pid;
640 } else if (ret == pcmk_rc_ok) {
641
642 rc = pcmk_rc_ipc_pid_only;
643 } else if ((child->pid == 0) && (ret == EINVAL)) {
644
645 rc = pcmk_rc_ipc_unresponsive;
646 } else {
647 switch (ret) {
648 case EACCES:
649 rc = pcmk_rc_ipc_unauthorized;
650 break;
651 case ESRCH:
652 rc = pcmk_rc_ipc_unresponsive;
653 break;
654 default:
655 rc = ret;
656 break;
657 }
658 }
659 }
660 return rc;
661 }
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690 #define WAIT_TRIES 4
691 int
692 find_and_track_existing_processes(void)
693 {
694 bool wait_in_progress;
695 int rc;
696 size_t i, rounds;
697
698 for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
699 wait_in_progress = false;
700 for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
701
702 if ((pcmk_children[i].endpoint == NULL)
703 || (pcmk_children[i].respawn_count < 0)) {
704 continue;
705 }
706
707 rc = child_liveness(&pcmk_children[i]);
708 if (rc == pcmk_rc_ipc_unresponsive) {
709
710
711
712
713 continue;
714 }
715
716
717 pcmk_children[i].respawn_count = rounds;
718 switch (rc) {
719 case pcmk_rc_ok:
720 if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
721 if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
722 crm_crit("Cannot reliably track pre-existing"
723 " authentic process behind %s IPC on this"
724 " platform and PCMK_" PCMK__ENV_FAIL_FAST
725 " requested",
726 pcmk_children[i].endpoint);
727 return EOPNOTSUPP;
728 } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
729 crm_notice("Assuming pre-existing authentic, though"
730 " on this platform untrackable, process"
731 " behind %s IPC is stable (was in %d"
732 " previous samples) so rather than"
733 " bailing out (PCMK_" PCMK__ENV_FAIL_FAST
734 " not requested), we just switch to a"
735 " less optimal IPC liveness monitoring"
736 " (not very suitable for heavy load)",
737 pcmk_children[i].name, WAIT_TRIES - 1);
738 crm_warn("The process behind %s IPC cannot be"
739 " terminated, so the overall shutdown"
740 " will get delayed implicitly (%ld s),"
741 " which serves as a graceful period for"
742 " its native termination if it vitally"
743 " depends on some other daemons going"
744 " down in a controlled way already",
745 pcmk_children[i].name,
746 (long) SHUTDOWN_ESCALATION_PERIOD);
747 } else {
748 wait_in_progress = true;
749 crm_warn("Cannot reliably track pre-existing"
750 " authentic process behind %s IPC on this"
751 " platform, can still disappear in %d"
752 " attempt(s)", pcmk_children[i].endpoint,
753 WAIT_TRIES - pcmk_children[i].respawn_count);
754 continue;
755 }
756 }
757 crm_notice("Tracking existing %s process (pid=%lld)",
758 pcmk_children[i].name,
759 (long long) PCMK__SPECIAL_PID_AS_0(
760 pcmk_children[i].pid));
761 pcmk_children[i].respawn_count = -1;
762 pcmk_children[i].active_before_startup = true;
763 break;
764 case pcmk_rc_ipc_pid_only:
765 if (pcmk_children[i].respawn_count == WAIT_TRIES) {
766 crm_crit("%s IPC end-point for existing authentic"
767 " process %lld did not (re)appear",
768 pcmk_children[i].endpoint,
769 (long long) PCMK__SPECIAL_PID_AS_0(
770 pcmk_children[i].pid));
771 return rc;
772 }
773 wait_in_progress = true;
774 crm_warn("Cannot find %s IPC end-point for existing"
775 " authentic process %lld, can still (re)appear"
776 " in %d attempts (?)",
777 pcmk_children[i].endpoint,
778 (long long) PCMK__SPECIAL_PID_AS_0(
779 pcmk_children[i].pid),
780 WAIT_TRIES - pcmk_children[i].respawn_count);
781 continue;
782 default:
783 crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d",
784 pcmk_children[i].name, pcmk_rc_str(rc), rc);
785 return rc;
786 }
787 }
788 if (!wait_in_progress) {
789 break;
790 }
791 pcmk__sleep_ms(250);
792 }
793 for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
794 pcmk_children[i].respawn_count = 0;
795 }
796
797 g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon,
798 NULL);
799 return pcmk_rc_ok;
800 }
801
802 gboolean
803 init_children_processes(void *user_data)
804 {
805 if (is_corosync_cluster()) {
806
807
808
809
810 need_root_group = false;
811 }
812
813
814 for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
815 if (pcmk_children[i].pid != 0) {
816
817 continue;
818 }
819
820 start_child(&(pcmk_children[i]));
821 }
822
823
824
825
826
827
828 pcmk__set_env_option(PCMK__ENV_RESPAWNED, "true", false);
829 pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
830 return TRUE;
831 }
832
833 void
834 pcmk_shutdown(int nsig)
835 {
836 if (shutdown_trigger == NULL) {
837 shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
838 }
839 mainloop_set_trigger(shutdown_trigger);
840 }
841
842 void
843 restart_cluster_subdaemons(void)
844 {
845 for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
846 if (!pcmk_children[i].needs_retry || pcmk_children[i].pid != 0) {
847 continue;
848 }
849
850 crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name);
851 if (start_child(&pcmk_children[i])) {
852 pcmk_children[i].needs_retry = false;
853 }
854 }
855 }
856
857 static gboolean
858 stop_child(pcmk_child_t * child, int signal)
859 {
860 if (signal == 0) {
861 signal = SIGTERM;
862 }
863
864
865
866
867
868 if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
869 crm_debug("Nothing to do for child \"%s\" (process %lld)",
870 child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
871 return TRUE;
872 }
873
874 if (child->pid <= 0) {
875 crm_trace("Client %s not running", child->name);
876 return TRUE;
877 }
878
879 errno = 0;
880 if (kill(child->pid, signal) == 0) {
881 crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld",
882 child->name, signal, (long long) child->pid);
883
884 } else {
885 crm_err("Could not stop %s (process %lld) with signal %d: %s",
886 child->name, (long long) child->pid, signal, strerror(errno));
887 }
888
889 return TRUE;
890 }
891