This source file includes following definitions.
- pcmkd_cluster_connected
- check_next_subdaemon
- escalate_shutdown
- pcmk_child_exit
- pcmk_process_exit
- pcmk_shutdown_worker
- start_child
- child_liveness
- find_and_track_existing_processes
- init_children_processes
- pcmk_shutdown
- restart_cluster_subdaemons
- stop_child
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include "pacemakerd.h"
12
13 #include <errno.h>
14 #include <grp.h>
15 #include <signal.h>
16 #include <stdbool.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/types.h>
20 #include <time.h>
21 #include <unistd.h>
22
23 #include <crm/cluster.h>
24 #include <crm/msg_xml.h>
25
26 typedef struct pcmk_child_s {
27 pid_t pid;
28 int respawn_count;
29 bool respawn;
30 const char *name;
31 const char *uid;
32 const char *command;
33 const char *endpoint;
34 bool needs_cluster;
35 int check_count;
36
37
38 bool needs_retry;
39 bool active_before_startup;
40 } pcmk_child_t;
41
42 #define PCMK_PROCESS_CHECK_INTERVAL 1
43 #define PCMK_PROCESS_CHECK_RETRIES 5
44 #define SHUTDOWN_ESCALATION_PERIOD 180000
45
46
47 #define PCMK_CHILD_CONTROLD 5
48
49 static pcmk_child_t pcmk_children[] = {
50 {
51 0, 0, true, "pacemaker-based", CRM_DAEMON_USER,
52 CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO,
53 true
54 },
55 {
56 0, 0, true, "pacemaker-fenced", NULL,
57 CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng",
58 true
59 },
60 {
61 0, 0, true, "pacemaker-execd", NULL,
62 CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD,
63 false
64 },
65 {
66 0, 0, true, "pacemaker-attrd", CRM_DAEMON_USER,
67 CRM_DAEMON_DIR "/pacemaker-attrd", T_ATTRD,
68 true
69 },
70 {
71 0, 0, true, "pacemaker-schedulerd", CRM_DAEMON_USER,
72 CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE,
73 false
74 },
75 {
76 0, 0, true, "pacemaker-controld", CRM_DAEMON_USER,
77 CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD,
78 true
79 },
80 };
81
82 static char *opts_default[] = { NULL, NULL };
83 static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
84
85 crm_trigger_t *shutdown_trigger = NULL;
86 crm_trigger_t *startup_trigger = NULL;
87 time_t subdaemon_check_progress = 0;
88
89
90
91
92
93
94
95
96
97
98
99 unsigned int shutdown_complete_state_reported_to = 0;
100 gboolean shutdown_complete_state_reported_client_closed = FALSE;
101
102
103 const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
104 gboolean running_with_sbd = FALSE;
105
106 GMainLoop *mainloop = NULL;
107
108 static gboolean fatal_error = FALSE;
109
110 static int child_liveness(pcmk_child_t *child);
111 static gboolean escalate_shutdown(gpointer data);
112 static int start_child(pcmk_child_t * child);
113 static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
114 static void pcmk_process_exit(pcmk_child_t * child);
115 static gboolean pcmk_shutdown_worker(gpointer user_data);
116 static gboolean stop_child(pcmk_child_t * child, int signal);
117
118 static bool
119 pcmkd_cluster_connected(void)
120 {
121 #if SUPPORT_COROSYNC
122 return pcmkd_corosync_connected();
123 #else
124 return true;
125 #endif
126 }
127
128 static gboolean
129 check_next_subdaemon(gpointer user_data)
130 {
131 static int next_child = 0;
132 int rc = child_liveness(&pcmk_children[next_child]);
133
134 crm_trace("Checked %s[%lld]: %s (%d)",
135 pcmk_children[next_child].name,
136 (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[next_child].pid),
137 pcmk_rc_str(rc), rc);
138
139 switch (rc) {
140 case pcmk_rc_ok:
141 pcmk_children[next_child].check_count = 0;
142 subdaemon_check_progress = time(NULL);
143 break;
144 case pcmk_rc_ipc_pid_only:
145 pcmk_children[next_child].check_count++;
146 if (pcmk_children[next_child].check_count >= PCMK_PROCESS_CHECK_RETRIES) {
147 crm_err("%s[%lld] is unresponsive to ipc after %d tries but "
148 "we found the pid so have it killed that we can restart",
149 pcmk_children[next_child].name,
150 (long long) PCMK__SPECIAL_PID_AS_0(
151 pcmk_children[next_child].pid),
152 pcmk_children[next_child].check_count);
153 stop_child(&pcmk_children[next_child], SIGKILL);
154 if (pcmk_children[next_child].respawn) {
155
156
157
158 pcmk_children[next_child].check_count = 0;
159 }
160 } else {
161 crm_notice("%s[%lld] is unresponsive to ipc after %d tries",
162 pcmk_children[next_child].name,
163 (long long) PCMK__SPECIAL_PID_AS_0(
164 pcmk_children[next_child].pid),
165 pcmk_children[next_child].check_count);
166 if (pcmk_children[next_child].respawn) {
167
168
169
170
171
172 subdaemon_check_progress = time(NULL);
173 }
174 }
175
176
177
178 break;
179 case pcmk_rc_ipc_unresponsive:
180 if (!pcmk_children[next_child].respawn) {
181
182
183
184
185
186
187 if (pcmk_children[next_child].respawn_count <= MAX_RESPAWN) {
188 subdaemon_check_progress = time(NULL);
189 }
190 }
191 if (!pcmk_children[next_child].active_before_startup) {
192 crm_trace("found %s[%lld] missing - signal-handler "
193 "will take care of it",
194 pcmk_children[next_child].name,
195 (long long) PCMK__SPECIAL_PID_AS_0(
196 pcmk_children[next_child].pid));
197 break;
198 }
199 if (pcmk_children[next_child].respawn) {
200 crm_err("%s[%lld] terminated",
201 pcmk_children[next_child].name,
202 (long long) PCMK__SPECIAL_PID_AS_0(
203 pcmk_children[next_child].pid));
204 } else {
205
206 crm_notice("%s[%lld] terminated",
207 pcmk_children[next_child].name,
208 (long long) PCMK__SPECIAL_PID_AS_0(
209 pcmk_children[next_child].pid));
210 }
211 pcmk_process_exit(&(pcmk_children[next_child]));
212 break;
213 default:
214 crm_exit(CRM_EX_FATAL);
215 break;
216 }
217
218 next_child++;
219 if (next_child >= PCMK__NELEM(pcmk_children)) {
220 next_child = 0;
221 }
222
223 return G_SOURCE_CONTINUE;
224 }
225
226 static gboolean
227 escalate_shutdown(gpointer data)
228 {
229 pcmk_child_t *child = data;
230
231 if (child->pid == PCMK__SPECIAL_PID) {
232 pcmk_process_exit(child);
233
234 } else if (child->pid != 0) {
235
236 crm_err("Child %s not terminating in a timely manner, forcing", child->name);
237 stop_child(child, SIGSEGV);
238 }
239 return FALSE;
240 }
241
242 static void
243 pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
244 {
245 pcmk_child_t *child = mainloop_child_userdata(p);
246 const char *name = mainloop_child_name(p);
247
248 if (signo) {
249 do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
250 "%s[%d] terminated with signal %d (%s)%s",
251 name, pid, signo, strsignal(signo),
252 (core? " and dumped core" : ""));
253
254 } else {
255 switch(exitcode) {
256 case CRM_EX_OK:
257 crm_info("%s[%d] exited with status %d (%s)",
258 name, pid, exitcode, crm_exit_str(exitcode));
259 break;
260
261 case CRM_EX_FATAL:
262 crm_warn("Shutting cluster down because %s[%d] had fatal failure",
263 name, pid);
264 child->respawn = false;
265 fatal_error = TRUE;
266 pcmk_shutdown(SIGTERM);
267 break;
268
269 case CRM_EX_PANIC:
270 crm_emerg("%s[%d] instructed the machine to reset", name, pid);
271 child->respawn = false;
272 fatal_error = TRUE;
273 pcmk__panic(__func__);
274 pcmk_shutdown(SIGTERM);
275 break;
276
277 default:
278 crm_err("%s[%d] exited with status %d (%s)",
279 name, pid, exitcode, crm_exit_str(exitcode));
280 break;
281 }
282 }
283
284 pcmk_process_exit(child);
285 }
286
287 static void
288 pcmk_process_exit(pcmk_child_t * child)
289 {
290 child->pid = 0;
291 child->active_before_startup = false;
292 child->check_count = 0;
293
294 child->respawn_count += 1;
295 if (child->respawn_count > MAX_RESPAWN) {
296 crm_err("Child respawn count exceeded by %s", child->name);
297 child->respawn = false;
298 }
299
300 if (shutdown_trigger) {
301
302 mainloop_set_trigger(shutdown_trigger);
303
304 } else if (!child->respawn) {
305
306
307 } else if (crm_is_true(getenv("PCMK_fail_fast"))) {
308 crm_err("Rebooting system because of %s", child->name);
309 pcmk__panic(__func__);
310
311 } else if (child_liveness(child) == pcmk_rc_ok) {
312 crm_warn("One-off suppressing strict respawning of a child process %s,"
313 " appears alright per %s IPC end-point",
314 child->name, child->endpoint);
315
316 } else if (child->needs_cluster && !pcmkd_cluster_connected()) {
317 crm_notice("Not respawning %s subdaemon until cluster returns",
318 child->name);
319 child->needs_retry = true;
320
321 } else {
322 crm_notice("Respawning %s subdaemon after unexpected exit",
323 child->name);
324 start_child(child);
325 }
326 }
327
328 static gboolean
329 pcmk_shutdown_worker(gpointer user_data)
330 {
331 static int phase = PCMK__NELEM(pcmk_children) - 1;
332 static time_t next_log = 0;
333
334 if (phase == PCMK__NELEM(pcmk_children) - 1) {
335 crm_notice("Shutting down Pacemaker");
336 pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
337 }
338
339 for (; phase >= 0; phase--) {
340 pcmk_child_t *child = &(pcmk_children[phase]);
341
342 if (child->pid != 0) {
343 time_t now = time(NULL);
344
345 if (child->respawn) {
346 if (child->pid == PCMK__SPECIAL_PID) {
347 crm_warn("The process behind %s IPC cannot be"
348 " terminated, so either wait the graceful"
349 " period of %ld s for its native termination"
350 " if it vitally depends on some other daemons"
351 " going down in a controlled way already,"
352 " or locate and kill the correct %s process"
353 " on your own; set PCMK_fail_fast=1 to avoid"
354 " this altogether next time around",
355 child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
356 child->command);
357 }
358 next_log = now + 30;
359 child->respawn = false;
360 stop_child(child, SIGTERM);
361 if (phase < PCMK_CHILD_CONTROLD) {
362 g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
363 escalate_shutdown, child);
364 }
365
366 } else if (now >= next_log) {
367 next_log = now + 30;
368 crm_notice("Still waiting for %s to terminate "
369 CRM_XS " pid=%lld",
370 child->name, (long long) child->pid);
371 }
372 return TRUE;
373 }
374
375
376 crm_debug("%s confirmed stopped", child->name);
377 child->pid = 0;
378 }
379
380 crm_notice("Shutdown complete");
381 pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
382 if (!fatal_error && running_with_sbd &&
383 pcmk__get_sbd_sync_resource_startup() &&
384 !shutdown_complete_state_reported_client_closed) {
385 crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
386 return TRUE;
387 }
388
389 {
390 const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY);
391 if(delay) {
392 sync();
393 pcmk__sleep_ms(crm_get_msec(delay));
394 }
395 }
396
397 g_main_loop_quit(mainloop);
398
399 if (fatal_error) {
400 crm_notice("Shutting down and staying down after fatal error");
401 #ifdef SUPPORT_COROSYNC
402 pcmkd_shutdown_corosync();
403 #endif
404 crm_exit(CRM_EX_FATAL);
405 }
406
407 return TRUE;
408 }
409
410
411
412
413
414
415
416 static int
417 start_child(pcmk_child_t * child)
418 {
419 uid_t uid = 0;
420 gid_t gid = 0;
421 gboolean use_valgrind = FALSE;
422 gboolean use_callgrind = FALSE;
423 const char *env_valgrind = getenv("PCMK_valgrind_enabled");
424 const char *env_callgrind = getenv("PCMK_callgrind_enabled");
425
426 child->active_before_startup = false;
427 child->check_count = 0;
428
429 if (child->command == NULL) {
430 crm_info("Nothing to do for child \"%s\"", child->name);
431 return pcmk_rc_ok;
432 }
433
434 if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
435 use_callgrind = TRUE;
436 use_valgrind = TRUE;
437
438 } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
439 use_callgrind = TRUE;
440 use_valgrind = TRUE;
441
442 } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
443 use_valgrind = TRUE;
444
445 } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
446 use_valgrind = TRUE;
447 }
448
449 if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
450 crm_warn("Cannot enable valgrind for %s:"
451 " The location of the valgrind binary is unknown", child->name);
452 use_valgrind = FALSE;
453 }
454
455 if (child->uid) {
456 if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
457 crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
458 return EACCES;
459 }
460 crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
461 }
462
463 child->pid = fork();
464 CRM_ASSERT(child->pid != -1);
465
466 if (child->pid > 0) {
467
468 mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
469
470 crm_info("Forked child %lld for process %s%s",
471 (long long) child->pid, child->name,
472 use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
473 return pcmk_rc_ok;
474
475 } else {
476
477 (void)setsid();
478
479
480 opts_vgrind[0] = strdup(VALGRIND_BIN);
481 if (use_callgrind) {
482 opts_vgrind[1] = strdup("--tool=callgrind");
483 opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
484 opts_vgrind[3] = strdup(child->command);
485 opts_vgrind[4] = NULL;
486 } else {
487 opts_vgrind[1] = strdup(child->command);
488 opts_vgrind[2] = NULL;
489 opts_vgrind[3] = NULL;
490 opts_vgrind[4] = NULL;
491 }
492 opts_default[0] = strdup(child->command);
493
494 if(gid) {
495
496 bool need_root_group = TRUE;
497
498 if (is_corosync_cluster()) {
499
500
501
502
503 need_root_group = FALSE;
504 }
505
506
507 if (!need_root_group && (setgid(gid) < 0)) {
508 crm_warn("Could not set group to %d: %s", gid, strerror(errno));
509 }
510
511
512
513
514 if (initgroups(child->uid, gid) < 0) {
515 crm_err("Cannot initialize groups for %s: %s (%d)",
516 child->uid, pcmk_rc_str(errno), errno);
517 }
518 }
519
520 if (uid && setuid(uid) < 0) {
521 crm_warn("Could not set user to %s (id %d): %s",
522 child->uid, uid, strerror(errno));
523 }
524
525 pcmk__close_fds_in_child(true);
526
527 pcmk__open_devnull(O_RDONLY);
528 pcmk__open_devnull(O_WRONLY);
529 pcmk__open_devnull(O_WRONLY);
530
531 if (use_valgrind) {
532 (void)execvp(VALGRIND_BIN, opts_vgrind);
533 } else {
534 (void)execvp(child->command, opts_default);
535 }
536 crm_crit("Could not execute %s: %s", child->command, strerror(errno));
537 crm_exit(CRM_EX_FATAL);
538 }
539 return pcmk_rc_ok;
540 }
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560 static int
561 child_liveness(pcmk_child_t *child)
562 {
563 uid_t cl_uid = 0;
564 gid_t cl_gid = 0;
565 const uid_t root_uid = 0;
566 const gid_t root_gid = 0;
567 const uid_t *ref_uid;
568 const gid_t *ref_gid;
569 int rc = pcmk_rc_ipc_unresponsive;
570 pid_t ipc_pid = 0;
571
572 if (child->endpoint == NULL
573 && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
574 crm_err("Cannot track child %s for missing both API end-point and PID",
575 child->name);
576 rc = EINVAL;
577
578 } else if (child->endpoint != NULL) {
579 int legacy_rc = pcmk_ok;
580
581 if (child->uid == NULL) {
582 ref_uid = &root_uid;
583 ref_gid = &root_gid;
584 } else {
585 ref_uid = &cl_uid;
586 ref_gid = &cl_gid;
587 legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
588 }
589
590 if (legacy_rc < 0) {
591 rc = pcmk_legacy2rc(legacy_rc);
592 crm_err("Could not find user and group IDs for user %s: %s "
593 CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
594 } else {
595 rc = pcmk__ipc_is_authentic_process_active(child->endpoint,
596 *ref_uid, *ref_gid,
597 &ipc_pid);
598 if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
599 if (child->pid <= 0) {
600
601
602
603
604
605 child->pid = ipc_pid;
606 } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
607
608
609
610 rc = pcmk_rc_ipc_unresponsive;
611 }
612 }
613 }
614 }
615
616 if (rc == pcmk_rc_ipc_unresponsive) {
617
618
619
620
621
622
623 int ret = pcmk__pid_active(child->pid, child->name);
624
625 if (ipc_pid && ((ret != pcmk_rc_ok)
626 || ipc_pid == PCMK__SPECIAL_PID
627 || (pcmk__pid_active(ipc_pid,
628 child->name) == pcmk_rc_ok))) {
629
630
631
632
633
634 if (ret == pcmk_rc_ok) {
635
636
637
638
639
640
641
642
643 stop_child(child, SIGKILL);
644 }
645 rc = pcmk_rc_ok;
646 child->pid = ipc_pid;
647 } else if (ret == pcmk_rc_ok) {
648
649 rc = pcmk_rc_ipc_pid_only;
650 } else if ((child->pid == 0) && (ret == EINVAL)) {
651
652 rc = pcmk_rc_ipc_unresponsive;
653 } else {
654 switch (ret) {
655 case EACCES:
656 rc = pcmk_rc_ipc_unauthorized;
657 break;
658 case ESRCH:
659 rc = pcmk_rc_ipc_unresponsive;
660 break;
661 default:
662 rc = ret;
663 break;
664 }
665 }
666 }
667 return rc;
668 }
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697 #define WAIT_TRIES 4
698 int
699 find_and_track_existing_processes(void)
700 {
701 bool wait_in_progress;
702 int rc;
703 size_t i, rounds;
704
705 for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
706 wait_in_progress = false;
707 for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
708
709 if ((pcmk_children[i].endpoint == NULL)
710 || (pcmk_children[i].respawn_count < 0)) {
711 continue;
712 }
713
714 rc = child_liveness(&pcmk_children[i]);
715 if (rc == pcmk_rc_ipc_unresponsive) {
716
717
718
719
720 continue;
721 }
722
723 pcmk_children[i].respawn_count = rounds;
724 switch (rc) {
725 case pcmk_rc_ok:
726 if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
727 if (crm_is_true(getenv("PCMK_fail_fast"))) {
728 crm_crit("Cannot reliably track pre-existing"
729 " authentic process behind %s IPC on this"
730 " platform and PCMK_fail_fast requested",
731 pcmk_children[i].endpoint);
732 return EOPNOTSUPP;
733 } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
734 crm_notice("Assuming pre-existing authentic, though"
735 " on this platform untrackable, process"
736 " behind %s IPC is stable (was in %d"
737 " previous samples) so rather than"
738 " bailing out (PCMK_fail_fast not"
739 " requested), we just switch to a less"
740 " optimal IPC liveness monitoring"
741 " (not very suitable for heavy load)",
742 pcmk_children[i].name, WAIT_TRIES - 1);
743 crm_warn("The process behind %s IPC cannot be"
744 " terminated, so the overall shutdown"
745 " will get delayed implicitly (%ld s),"
746 " which serves as a graceful period for"
747 " its native termination if it vitally"
748 " depends on some other daemons going"
749 " down in a controlled way already",
750 pcmk_children[i].name,
751 (long) SHUTDOWN_ESCALATION_PERIOD);
752 } else {
753 wait_in_progress = true;
754 crm_warn("Cannot reliably track pre-existing"
755 " authentic process behind %s IPC on this"
756 " platform, can still disappear in %d"
757 " attempt(s)", pcmk_children[i].endpoint,
758 WAIT_TRIES - pcmk_children[i].respawn_count);
759 continue;
760 }
761 }
762 crm_notice("Tracking existing %s process (pid=%lld)",
763 pcmk_children[i].name,
764 (long long) PCMK__SPECIAL_PID_AS_0(
765 pcmk_children[i].pid));
766 pcmk_children[i].respawn_count = -1;
767 pcmk_children[i].active_before_startup = true;
768 break;
769 case pcmk_rc_ipc_pid_only:
770 if (pcmk_children[i].respawn_count == WAIT_TRIES) {
771 crm_crit("%s IPC end-point for existing authentic"
772 " process %lld did not (re)appear",
773 pcmk_children[i].endpoint,
774 (long long) PCMK__SPECIAL_PID_AS_0(
775 pcmk_children[i].pid));
776 return rc;
777 }
778 wait_in_progress = true;
779 crm_warn("Cannot find %s IPC end-point for existing"
780 " authentic process %lld, can still (re)appear"
781 " in %d attempts (?)",
782 pcmk_children[i].endpoint,
783 (long long) PCMK__SPECIAL_PID_AS_0(
784 pcmk_children[i].pid),
785 WAIT_TRIES - pcmk_children[i].respawn_count);
786 continue;
787 default:
788 crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d",
789 pcmk_children[i].name, pcmk_rc_str(rc), rc);
790 return rc;
791 }
792 }
793 if (!wait_in_progress) {
794 break;
795 }
796 pcmk__sleep_ms(250);
797 }
798 for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
799 pcmk_children[i].respawn_count = 0;
800 }
801
802 g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon,
803 NULL);
804 return pcmk_rc_ok;
805 }
806
807 gboolean
808 init_children_processes(void *user_data)
809 {
810
811 for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
812 if (pcmk_children[i].pid != 0) {
813
814 continue;
815 }
816
817 start_child(&(pcmk_children[i]));
818 }
819
820
821
822
823
824
825 setenv("PCMK_respawned", "true", 1);
826 pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
827 return TRUE;
828 }
829
830 void
831 pcmk_shutdown(int nsig)
832 {
833 if (shutdown_trigger == NULL) {
834 shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
835 }
836 mainloop_set_trigger(shutdown_trigger);
837 }
838
839 void
840 restart_cluster_subdaemons(void)
841 {
842 for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
843 if (!pcmk_children[i].needs_retry || pcmk_children[i].pid != 0) {
844 continue;
845 }
846
847 crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name);
848 if (start_child(&pcmk_children[i])) {
849 pcmk_children[i].needs_retry = false;
850 }
851 }
852 }
853
854 static gboolean
855 stop_child(pcmk_child_t * child, int signal)
856 {
857 if (signal == 0) {
858 signal = SIGTERM;
859 }
860
861
862
863
864
865 if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
866 crm_debug("Nothing to do for child \"%s\" (process %lld)",
867 child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
868 return TRUE;
869 }
870
871 if (child->pid <= 0) {
872 crm_trace("Client %s not running", child->name);
873 return TRUE;
874 }
875
876 errno = 0;
877 if (kill(child->pid, signal) == 0) {
878 crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld",
879 child->name, signal, (long long) child->pid);
880
881 } else {
882 crm_err("Could not stop %s (process %lld) with signal %d: %s",
883 child->name, (long long) child->pid, signal, strerror(errno));
884 }
885
886 return TRUE;
887 }
888