root/daemons/pacemakerd/pcmkd_subdaemons.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. pcmkd_cluster_connected
  2. check_next_subdaemon
  3. escalate_shutdown
  4. pcmk_child_exit
  5. pcmk_process_exit
  6. pcmk_shutdown_worker
  7. start_child
  8. child_liveness
  9. find_and_track_existing_processes
  10. init_children_processes
  11. pcmk_shutdown
  12. restart_cluster_subdaemons
  13. stop_child

   1 /*
   2  * Copyright 2010-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include "pacemakerd.h"
  12 
  13 #if SUPPORT_COROSYNC
  14 #include "pcmkd_corosync.h"
  15 #endif
  16 
  17 #include <errno.h>
  18 #include <grp.h>
  19 #include <signal.h>
  20 #include <stdbool.h>
  21 #include <stdlib.h>
  22 #include <string.h>
  23 #include <sys/types.h>
  24 #include <time.h>
  25 #include <unistd.h>
  26 
  27 #include <crm/cluster.h>
  28 #include <crm/common/xml.h>
  29 
  30 enum child_daemon_flags {
  31     child_none                  = 0,
  32     child_respawn               = 1 << 0,
  33     child_needs_cluster         = 1 << 1,
  34     child_needs_retry           = 1 << 2,
  35     child_active_before_startup = 1 << 3,
  36 };
  37 
  38 typedef struct pcmk_child_s {
  39     pid_t pid;
  40     int respawn_count;
  41     const char *name;
  42     const char *uid;
  43     const char *command;
  44     const char *endpoint;  /* IPC server name */
  45     int check_count;
  46     uint32_t flags;
  47 } pcmk_child_t;
  48 
  49 #define PCMK_PROCESS_CHECK_INTERVAL 1
  50 #define PCMK_PROCESS_CHECK_RETRIES  5
  51 #define SHUTDOWN_ESCALATION_PERIOD  180000  /* 3m */
  52 
  53 /* Index into the array below */
  54 #define PCMK_CHILD_CONTROLD  5
  55 
  56 static pcmk_child_t pcmk_children[] = {
  57     {
  58         0, 0, "pacemaker-based", CRM_DAEMON_USER,
  59         CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO,
  60         0, child_respawn | child_needs_cluster
  61     },
  62     {
  63         0, 0, "pacemaker-fenced", NULL,
  64         CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng",
  65         0, child_respawn | child_needs_cluster
  66     },
  67     {
  68         0, 0, "pacemaker-execd", NULL,
  69         CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD,
  70         0, child_respawn
  71     },
  72     {
  73         0, 0, "pacemaker-attrd", CRM_DAEMON_USER,
  74         CRM_DAEMON_DIR "/pacemaker-attrd", PCMK__VALUE_ATTRD,
  75         0, child_respawn | child_needs_cluster
  76     },
  77     {
  78         0, 0, "pacemaker-schedulerd", CRM_DAEMON_USER,
  79         CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE,
  80         0, child_respawn
  81     },
  82     {
  83         0, 0, "pacemaker-controld", CRM_DAEMON_USER,
  84         CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD,
  85         0, child_respawn | child_needs_cluster
  86     },
  87 };
  88 
  89 static char *opts_default[] = { NULL, NULL };
  90 static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
  91 
  92 crm_trigger_t *shutdown_trigger = NULL;
  93 crm_trigger_t *startup_trigger = NULL;
  94 time_t subdaemon_check_progress = 0;
  95 
  96 // Whether we need root group access to talk to cluster layer
  97 static bool need_root_group = true;
  98 
  99 /* When contacted via pacemakerd-api by a client having sbd in
 100  * the name we assume it is sbd-daemon which wants to know
 101  * if pacemakerd shutdown gracefully.
 102  * Thus when everything is shutdown properly pacemakerd
 103  * waits till it has reported the graceful completion of
 104  * shutdown to sbd and just when sbd-client closes the
 105  * connection we can assume that the report has arrived
 106  * properly so that pacemakerd can finally exit.
 107  * Following two variables are used to track that handshake.
 108  */
 109 unsigned int shutdown_complete_state_reported_to = 0;
 110 gboolean shutdown_complete_state_reported_client_closed = FALSE;
 111 
 112 /* state we report when asked via pacemakerd-api status-ping */
 113 const char *pacemakerd_state = PCMK__VALUE_INIT;
 114 gboolean running_with_sbd = FALSE; /* local copy */
 115 
 116 GMainLoop *mainloop = NULL;
 117 
 118 static gboolean fatal_error = FALSE;
 119 
 120 static int child_liveness(pcmk_child_t *child);
 121 static gboolean escalate_shutdown(gpointer data);
 122 static int start_child(pcmk_child_t * child);
 123 static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
 124 static void pcmk_process_exit(pcmk_child_t * child);
 125 static gboolean pcmk_shutdown_worker(gpointer user_data);
 126 static gboolean stop_child(pcmk_child_t * child, int signal);
 127 
 128 static bool
 129 pcmkd_cluster_connected(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 130 {
 131 #if SUPPORT_COROSYNC
 132     return pcmkd_corosync_connected();
 133 #else
 134     return true;
 135 #endif
 136 }
 137 
 138 static gboolean
 139 check_next_subdaemon(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 140 {
 141     static int next_child = 0;
 142 
 143     pcmk_child_t *child = &(pcmk_children[next_child]);
 144     const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid);
 145     int rc = child_liveness(child);
 146 
 147     crm_trace("Checked %s[%lld]: %s (%d)",
 148               child->name, pid, pcmk_rc_str(rc), rc);
 149 
 150     switch (rc) {
 151         case pcmk_rc_ok:
 152             child->check_count = 0;
 153             subdaemon_check_progress = time(NULL);
 154             break;
 155 
 156         case pcmk_rc_ipc_pid_only: // Child was previously OK
 157             if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) {
 158                 crm_crit("%s[%lld] is unresponsive to IPC after %d attempt%s "
 159                          "and will now be killed",
 160                          child->name, pid, child->check_count,
 161                          pcmk__plural_s(child->check_count));
 162                 stop_child(child, SIGKILL);
 163                 if (pcmk_is_set(child->flags, child_respawn)) {
 164                     // Respawn limit hasn't been reached, so retry another round
 165                     child->check_count = 0;
 166                 }
 167             } else {
 168                 crm_notice("%s[%lld] is unresponsive to IPC after %d attempt%s",
 169                            child->name, pid, child->check_count,
 170                            pcmk__plural_s(child->check_count));
 171                 if (pcmk_is_set(child->flags, child_respawn)) {
 172                     /* as long as the respawn-limit isn't reached
 173                        and we haven't run out of connect retries
 174                        we account this as progress we are willing
 175                        to tell to sbd
 176                      */
 177                     subdaemon_check_progress = time(NULL);
 178                 }
 179             }
 180             /* go to the next child and see if
 181                we can make progress there
 182              */
 183             break;
 184         case pcmk_rc_ipc_unresponsive:
 185             if (!pcmk_is_set(child->flags, child_respawn)) {
 186                 /* if a subdaemon is down and we don't want it
 187                    to be restarted this is a success during
 188                    shutdown. if it isn't restarted anymore
 189                    due to MAX_RESPAWN it is
 190                    rather no success.
 191                  */
 192                 if (child->respawn_count <= MAX_RESPAWN) {
 193                     subdaemon_check_progress = time(NULL);
 194                 }
 195             }
 196             if (!pcmk_is_set(child->flags, child_active_before_startup)) {
 197                 crm_trace("%s[%lld] terminated (relying on SIGCHLD handler)",
 198                           child->name, pid);
 199                 break;
 200             }
 201             if (pcmk_is_set(child->flags, child_respawn)) {
 202                 crm_err("%s[%lld] terminated", child->name, pid);
 203             } else {
 204                 /* orderly shutdown */
 205                 crm_notice("%s[%lld] terminated", child->name, pid);
 206             }
 207             pcmk_process_exit(child);
 208             break;
 209         default:
 210             crm_exit(CRM_EX_FATAL);
 211             break;  /* static analysis/noreturn */
 212     }
 213 
 214     if (++next_child >= PCMK__NELEM(pcmk_children)) {
 215         next_child = 0;
 216     }
 217 
 218     return G_SOURCE_CONTINUE;
 219 }
 220 
 221 static gboolean
 222 escalate_shutdown(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 223 {
 224     pcmk_child_t *child = data;
 225 
 226     if (child->pid == PCMK__SPECIAL_PID) {
 227         pcmk_process_exit(child);
 228 
 229     } else if (child->pid != 0) {
 230         /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
 231         crm_err("Child %s not terminating in a timely manner, forcing", child->name);
 232         stop_child(child, SIGSEGV);
 233     }
 234     return FALSE;
 235 }
 236 
 237 static void
 238 pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
     /* [previous][next][first][last][top][bottom][index][help] */
 239 {
 240     pcmk_child_t *child = mainloop_child_userdata(p);
 241     const char *name = mainloop_child_name(p);
 242 
 243     if (signo) {
 244         do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
 245                    "%s[%d] terminated with signal %d (%s)%s",
 246                    name, pid, signo, strsignal(signo),
 247                    (core? " and dumped core" : ""));
 248 
 249     } else {
 250         switch(exitcode) {
 251             case CRM_EX_OK:
 252                 crm_info("%s[%d] exited with status %d (%s)",
 253                          name, pid, exitcode, crm_exit_str(exitcode));
 254                 break;
 255 
 256             case CRM_EX_FATAL:
 257                 crm_warn("Shutting cluster down because %s[%d] had fatal failure",
 258                          name, pid);
 259                 child->flags &= ~child_respawn;
 260                 fatal_error = TRUE;
 261                 pcmk_shutdown(SIGTERM);
 262                 break;
 263 
 264             case CRM_EX_PANIC:
 265                 crm_emerg("%s[%d] instructed the machine to reset", name, pid);
 266                 child->flags &= ~child_respawn;
 267                 fatal_error = TRUE;
 268                 pcmk__panic(__func__);
 269                 pcmk_shutdown(SIGTERM);
 270                 break;
 271 
 272             default:
 273                 crm_err("%s[%d] exited with status %d (%s)",
 274                         name, pid, exitcode, crm_exit_str(exitcode));
 275                 break;
 276         }
 277     }
 278 
 279     pcmk_process_exit(child);
 280 }
 281 
 282 static void
 283 pcmk_process_exit(pcmk_child_t * child)
     /* [previous][next][first][last][top][bottom][index][help] */
 284 {
 285     child->pid = 0;
 286     child->flags &= ~child_active_before_startup;
 287     child->check_count = 0;
 288 
 289     child->respawn_count += 1;
 290     if (child->respawn_count > MAX_RESPAWN) {
 291         crm_err("Child respawn count exceeded by %s", child->name);
 292         child->flags &= ~child_respawn;
 293     }
 294 
 295     if (shutdown_trigger) {
 296         /* resume step-wise shutdown (returned TRUE yields no parallelizing) */
 297         mainloop_set_trigger(shutdown_trigger);
 298 
 299     } else if (!pcmk_is_set(child->flags, child_respawn)) {
 300         /* nothing to do */
 301 
 302     } else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
 303         crm_err("Rebooting system because of %s", child->name);
 304         pcmk__panic(__func__);
 305 
 306     } else if (child_liveness(child) == pcmk_rc_ok) {
 307         crm_warn("One-off suppressing strict respawning of a child process %s,"
 308                  " appears alright per %s IPC end-point",
 309                  child->name, child->endpoint);
 310 
 311     } else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) {
 312         crm_notice("Not respawning %s subdaemon until cluster returns",
 313                    child->name);
 314         child->flags |= child_needs_retry;
 315 
 316     } else {
 317         crm_notice("Respawning %s subdaemon after unexpected exit",
 318                    child->name);
 319         start_child(child);
 320     }
 321 }
 322 
 323 static gboolean
 324 pcmk_shutdown_worker(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 325 {
 326     static int phase = PCMK__NELEM(pcmk_children) - 1;
 327     static time_t next_log = 0;
 328 
 329     if (phase == PCMK__NELEM(pcmk_children) - 1) {
 330         crm_notice("Shutting down Pacemaker");
 331         pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN;
 332     }
 333 
 334     for (; phase >= 0; phase--) {
 335         pcmk_child_t *child = &(pcmk_children[phase]);
 336 
 337         if (child->pid != 0) {
 338             time_t now = time(NULL);
 339 
 340             if (pcmk_is_set(child->flags, child_respawn)) {
 341                 if (child->pid == PCMK__SPECIAL_PID) {
 342                     crm_warn("The process behind %s IPC cannot be"
 343                              " terminated, so either wait the graceful"
 344                              " period of %ld s for its native termination"
 345                              " if it vitally depends on some other daemons"
 346                              " going down in a controlled way already,"
 347                              " or locate and kill the correct %s process"
 348                              " on your own; set PCMK_" PCMK__ENV_FAIL_FAST "=1"
 349                              " to avoid this altogether next time around",
 350                              child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
 351                              child->command);
 352                 }
 353                 next_log = now + 30;
 354                 child->flags &= ~child_respawn;
 355                 stop_child(child, SIGTERM);
 356                 if (phase < PCMK_CHILD_CONTROLD) {
 357                     g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
 358                                   escalate_shutdown, child);
 359                 }
 360 
 361             } else if (now >= next_log) {
 362                 next_log = now + 30;
 363                 crm_notice("Still waiting for %s to terminate "
 364                            CRM_XS " pid=%lld",
 365                            child->name, (long long) child->pid);
 366             }
 367             return TRUE;
 368         }
 369 
 370         /* cleanup */
 371         crm_debug("%s confirmed stopped", child->name);
 372         child->pid = 0;
 373     }
 374 
 375     crm_notice("Shutdown complete");
 376     pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE;
 377     if (!fatal_error && running_with_sbd &&
 378         pcmk__get_sbd_sync_resource_startup() &&
 379         !shutdown_complete_state_reported_client_closed) {
 380         crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
 381         return TRUE;
 382     }
 383 
 384     // @COMPAT Drop shutdown delay at 3.0.0
 385     {
 386         const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY);
 387         if(delay) {
 388             long long delay_ms = crm_get_msec(delay);
 389 
 390             sync();
 391             if (delay_ms > 0) {
 392                 pcmk__sleep_ms((unsigned int) QB_MIN(delay_ms, UINT_MAX));
 393             }
 394         }
 395     }
 396 
 397     g_main_loop_quit(mainloop);
 398 
 399     if (fatal_error) {
 400         crm_notice("Shutting down and staying down after fatal error");
 401 #ifdef SUPPORT_COROSYNC
 402         pcmkd_shutdown_corosync();
 403 #endif
 404         crm_exit(CRM_EX_FATAL);
 405     }
 406 
 407     return TRUE;
 408 }
 409 
 410 /* TODO once libqb is taught to juggle with IPC end-points carried over as
 411         bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
 412         it shall hand over these descriptors here if/once they are successfully
 413         pre-opened in (presumably) child_liveness(), to avoid any remaining
 414         room for races */
 415  // \return Standard Pacemaker return code
 416 static int
 417 start_child(pcmk_child_t * child)
     /* [previous][next][first][last][top][bottom][index][help] */
 418 {
 419     uid_t uid = 0;
 420     gid_t gid = 0;
 421     gboolean use_valgrind = FALSE;
 422     gboolean use_callgrind = FALSE;
 423     const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED);
 424     const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED);
 425 
 426     child->flags &= ~child_active_before_startup;
 427     child->check_count = 0;
 428 
 429     if (child->command == NULL) {
 430         crm_info("Nothing to do for child \"%s\"", child->name);
 431         return pcmk_rc_ok;
 432     }
 433 
 434     if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
 435         use_callgrind = TRUE;
 436         use_valgrind = TRUE;
 437 
 438     } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
 439         use_callgrind = TRUE;
 440         use_valgrind = TRUE;
 441 
 442     } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
 443         use_valgrind = TRUE;
 444 
 445     } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
 446         use_valgrind = TRUE;
 447     }
 448 
 449     if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
 450         crm_warn("Cannot enable valgrind for %s:"
 451                  " The location of the valgrind binary is unknown", child->name);
 452         use_valgrind = FALSE;
 453     }
 454 
 455     if (child->uid) {
 456         if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
 457             crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
 458             return EACCES;
 459         }
 460         crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
 461     }
 462 
 463     child->pid = fork();
 464     CRM_ASSERT(child->pid != -1);
 465 
 466     if (child->pid > 0) {
 467         /* parent */
 468         mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
 469 
 470         crm_info("Forked child %lld for process %s%s",
 471                  (long long) child->pid, child->name,
 472                  use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
 473         return pcmk_rc_ok;
 474 
 475     } else {
 476         /* Start a new session */
 477         (void)setsid();
 478 
 479         /* Setup the two alternate arg arrays */
 480         opts_vgrind[0] = pcmk__str_copy(VALGRIND_BIN);
 481         if (use_callgrind) {
 482             opts_vgrind[1] = pcmk__str_copy("--tool=callgrind");
 483             opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file="
 484                                             CRM_STATE_DIR "/callgrind.out.%p");
 485             opts_vgrind[3] = pcmk__str_copy(child->command);
 486             opts_vgrind[4] = NULL;
 487         } else {
 488             opts_vgrind[1] = pcmk__str_copy(child->command);
 489             opts_vgrind[2] = NULL;
 490             opts_vgrind[3] = NULL;
 491             opts_vgrind[4] = NULL;
 492         }
 493         opts_default[0] = pcmk__str_copy(child->command);
 494 
 495         if(gid) {
 496             // Drop root group access if not needed
 497             if (!need_root_group && (setgid(gid) < 0)) {
 498                 crm_warn("Could not set group to %d: %s", gid, strerror(errno));
 499             }
 500 
 501             /* Initialize supplementary groups to only those always granted to
 502              * the user, plus haclient (so we can access IPC).
 503              */
 504             if (initgroups(child->uid, gid) < 0) {
 505                 crm_err("Cannot initialize groups for %s: %s (%d)",
 506                         child->uid, pcmk_rc_str(errno), errno);
 507             }
 508         }
 509 
 510         if (uid && setuid(uid) < 0) {
 511             crm_warn("Could not set user to %s (id %d): %s",
 512                      child->uid, uid, strerror(errno));
 513         }
 514 
 515         pcmk__close_fds_in_child(true);
 516 
 517         pcmk__open_devnull(O_RDONLY);   // stdin (fd 0)
 518         pcmk__open_devnull(O_WRONLY);   // stdout (fd 1)
 519         pcmk__open_devnull(O_WRONLY);   // stderr (fd 2)
 520 
 521         if (use_valgrind) {
 522             (void)execvp(VALGRIND_BIN, opts_vgrind);
 523         } else {
 524             (void)execvp(child->command, opts_default);
 525         }
 526         crm_crit("Could not execute %s: %s", child->command, strerror(errno));
 527         crm_exit(CRM_EX_FATAL);
 528     }
 529     return pcmk_rc_ok;          /* never reached */
 530 }
 531 
 532 /*!
 533  * \internal
 534  * \brief Check the liveness of the child based on IPC name and PID if tracked
 535  *
 536  * \param[in,out] child  Child tracked data
 537  *
 538  * \return Standard Pacemaker return code
 539  *
 540  * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
 541  *       indicating that no trace of IPC liveness was detected,
 542  *       pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
 543  *       an unauthorized process, and pcmk_rc_ipc_pid_only indicating that
 544  *       the child is up by PID but not IPC end-point (possibly starting).
 545  * \note This function doesn't modify any of \p child members but \c pid,
 546  *       and is not actively toying with processes as such but invoking
 547  *       \c stop_child in one particular case (there's for some reason
 548  *       a different authentic holder of the IPC end-point).
 549  */
 550 static int
 551 child_liveness(pcmk_child_t *child)
     /* [previous][next][first][last][top][bottom][index][help] */
 552 {
 553     uid_t cl_uid = 0;
 554     gid_t cl_gid = 0;
 555     const uid_t root_uid = 0;
 556     const gid_t root_gid = 0;
 557     const uid_t *ref_uid;
 558     const gid_t *ref_gid;
 559     int rc = pcmk_rc_ipc_unresponsive;
 560     pid_t ipc_pid = 0;
 561 
 562     if (child->endpoint == NULL
 563             && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
 564         crm_err("Cannot track child %s for missing both API end-point and PID",
 565                 child->name);
 566         rc = EINVAL; // Misuse of function when child is not trackable
 567 
 568     } else if (child->endpoint != NULL) {
 569         int legacy_rc = pcmk_ok;
 570 
 571         if (child->uid == NULL) {
 572             ref_uid = &root_uid;
 573             ref_gid = &root_gid;
 574         } else {
 575             ref_uid = &cl_uid;
 576             ref_gid = &cl_gid;
 577             legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
 578         }
 579 
 580         if (legacy_rc < 0) {
 581             rc = pcmk_legacy2rc(legacy_rc);
 582             crm_err("Could not find user and group IDs for user %s: %s "
 583                     CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
 584         } else {
 585             rc = pcmk__ipc_is_authentic_process_active(child->endpoint,
 586                                                        *ref_uid, *ref_gid,
 587                                                        &ipc_pid);
 588             if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
 589                 if (child->pid <= 0) {
 590                     /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this
 591                      * initializes a new child. If rc is
 592                      * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will
 593                      * investigate further.
 594                      */
 595                     child->pid = ipc_pid;
 596                 } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
 597                     /* An unexpected (but authorized) process is responding to
 598                      * IPC. Investigate further.
 599                      */
 600                     rc = pcmk_rc_ipc_unresponsive;
 601                 }
 602             }
 603         }
 604     }
 605 
 606     if (rc == pcmk_rc_ipc_unresponsive) {
 607         /* If we get here, a child without IPC is being tracked, no IPC liveness
 608          * has been detected, or IPC liveness has been detected with an
 609          * unexpected (but authorized) process. This is safe on FreeBSD since
 610          * the only change possible from a proper child's PID into "special" PID
 611          * of 1 behind more loosely related process.
 612          */
 613         int ret = pcmk__pid_active(child->pid, child->name);
 614 
 615         if (ipc_pid && ((ret != pcmk_rc_ok)
 616                         || ipc_pid == PCMK__SPECIAL_PID
 617                         || (pcmk__pid_active(ipc_pid,
 618                                              child->name) == pcmk_rc_ok))) {
 619             /* An unexpected (but authorized) process was detected at the IPC
 620              * endpoint, and either it is active, or the child we're tracking is
 621              * not.
 622              */
 623 
 624             if (ret == pcmk_rc_ok) {
 625                 /* The child we're tracking is active. Kill it, and adopt the
 626                  * detected process. This assumes that our children don't fork
 627                  * (thus getting a different PID owning the IPC), but rather the
 628                  * tracking got out of sync because of some means external to
 629                  * Pacemaker, and adopting the detected process is better than
 630                  * killing it and possibly having to spawn a new child.
 631                  */
 632                 /* not possessing IPC, afterall (what about corosync CPG?) */
 633                 stop_child(child, SIGKILL);
 634             }
 635             rc = pcmk_rc_ok;
 636             child->pid = ipc_pid;
 637         } else if (ret == pcmk_rc_ok) {
 638             // Our tracked child's PID was found active, but not its IPC
 639             rc = pcmk_rc_ipc_pid_only;
 640         } else if ((child->pid == 0) && (ret == EINVAL)) {
 641             // FreeBSD can return EINVAL
 642             rc = pcmk_rc_ipc_unresponsive;
 643         } else {
 644             switch (ret) {
 645                 case EACCES:
 646                     rc = pcmk_rc_ipc_unauthorized;
 647                     break;
 648                 case ESRCH:
 649                     rc = pcmk_rc_ipc_unresponsive;
 650                     break;
 651                 default:
 652                     rc = ret;
 653                     break;
 654             }
 655         }
 656     }
 657     return rc;
 658 }
 659 
 660 /*!
 661  * \internal
 662  * \brief Initial one-off check of the pre-existing "child" processes
 663  *
 664  * With "child" process, we mean the subdaemon that defines an API end-point
 665  * (all of them do as of the comment) -- the possible complement is skipped
 666  * as it is deemed it has no such shared resources to cause conflicts about,
 667  * hence it can presumably be started anew without hesitation.
 668  * If that won't hold true in the future, the concept of a shared resource
 669  * will have to be generalized beyond the API end-point.
 670  *
 671  * For boundary cases that the "child" is still starting (IPC end-point is yet
 672  * to be witnessed), or more rarely (practically FreeBSD only), when there's
 673  * a pre-existing "untrackable" authentic process, we give the situation some
 674  * time to possibly unfold in the right direction, meaning that said socket
 675  * will appear or the unattainable process will disappear per the observable
 676  * IPC, respectively.
 677  *
 678  * \return Standard Pacemaker return code
 679  *
 680  * \note Since this gets run at the very start, \c respawn_count fields
 681  *       for particular children get temporarily overloaded with "rounds
 682  *       of waiting" tracking, restored once we are about to finish with
 683  *       success (i.e. returning value >=0) and will remain unrestored
 684  *       otherwise.  One way to suppress liveness detection logic for
 685  *       particular child is to set the said value to a negative number.
 686  */
 687 #define WAIT_TRIES 4  /* together with interleaved sleeps, worst case ~ 1s */
 688 int
 689 find_and_track_existing_processes(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 690 {
 691     bool wait_in_progress;
 692     int rc;
 693     size_t i, rounds;
 694 
 695     for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
 696         wait_in_progress = false;
 697         for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 698 
 699             if ((pcmk_children[i].endpoint == NULL)
 700                 || (pcmk_children[i].respawn_count < 0)) {
 701                 continue;
 702             }
 703 
 704             rc = child_liveness(&pcmk_children[i]);
 705             if (rc == pcmk_rc_ipc_unresponsive) {
 706                 /* As a speculation, don't give up if there are more rounds to
 707                  * come for other reasons, but don't artificially wait just
 708                  * because of this, since we would preferably start ASAP.
 709                  */
 710                 continue;
 711             }
 712 
 713             // @TODO Functionize more of this to reduce nesting
 714             pcmk_children[i].respawn_count = rounds;
 715             switch (rc) {
 716                 case pcmk_rc_ok:
 717                     if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
 718                         if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
 719                             crm_crit("Cannot reliably track pre-existing"
 720                                      " authentic process behind %s IPC on this"
 721                                      " platform and PCMK_" PCMK__ENV_FAIL_FAST
 722                                      " requested",
 723                                      pcmk_children[i].endpoint);
 724                             return EOPNOTSUPP;
 725                         } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
 726                             crm_notice("Assuming pre-existing authentic, though"
 727                                        " on this platform untrackable, process"
 728                                        " behind %s IPC is stable (was in %d"
 729                                        " previous samples) so rather than"
 730                                        " bailing out (PCMK_" PCMK__ENV_FAIL_FAST
 731                                        " not requested), we just switch to a"
 732                                        " less optimal IPC liveness monitoring"
 733                                        " (not very suitable for heavy load)",
 734                                        pcmk_children[i].name, WAIT_TRIES - 1);
 735                             crm_warn("The process behind %s IPC cannot be"
 736                                      " terminated, so the overall shutdown"
 737                                      " will get delayed implicitly (%ld s),"
 738                                      " which serves as a graceful period for"
 739                                      " its native termination if it vitally"
 740                                      " depends on some other daemons going"
 741                                      " down in a controlled way already",
 742                                      pcmk_children[i].name,
 743                                      (long) SHUTDOWN_ESCALATION_PERIOD);
 744                         } else {
 745                             wait_in_progress = true;
 746                             crm_warn("Cannot reliably track pre-existing"
 747                                      " authentic process behind %s IPC on this"
 748                                      " platform, can still disappear in %d"
 749                                      " attempt(s)", pcmk_children[i].endpoint,
 750                                      WAIT_TRIES - pcmk_children[i].respawn_count);
 751                             continue;
 752                         }
 753                     }
 754                     crm_notice("Tracking existing %s process (pid=%lld)",
 755                                pcmk_children[i].name,
 756                                (long long) PCMK__SPECIAL_PID_AS_0(
 757                                                pcmk_children[i].pid));
 758                     pcmk_children[i].respawn_count = -1;  /* 0~keep watching */
 759                     pcmk_children[i].flags |= child_active_before_startup;
 760                     break;
 761                 case pcmk_rc_ipc_pid_only:
 762                     if (pcmk_children[i].respawn_count == WAIT_TRIES) {
 763                         crm_crit("%s IPC end-point for existing authentic"
 764                                  " process %lld did not (re)appear",
 765                                  pcmk_children[i].endpoint,
 766                                  (long long) PCMK__SPECIAL_PID_AS_0(
 767                                                  pcmk_children[i].pid));
 768                         return rc;
 769                     }
 770                     wait_in_progress = true;
 771                     crm_warn("Cannot find %s IPC end-point for existing"
 772                              " authentic process %lld, can still (re)appear"
 773                              " in %d attempts (?)",
 774                              pcmk_children[i].endpoint,
 775                              (long long) PCMK__SPECIAL_PID_AS_0(
 776                                              pcmk_children[i].pid),
 777                              WAIT_TRIES - pcmk_children[i].respawn_count);
 778                     continue;
 779                 default:
 780                     crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d",
 781                              pcmk_children[i].name, pcmk_rc_str(rc), rc);
 782                     return rc;
 783             }
 784         }
 785         if (!wait_in_progress) {
 786             break;
 787         }
 788         pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen
 789     }
 790     for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 791         pcmk_children[i].respawn_count = 0;  /* restore pristine state */
 792     }
 793 
 794     g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon,
 795                           NULL);
 796     return pcmk_rc_ok;
 797 }
 798 
 799 gboolean
 800 init_children_processes(void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 801 {
 802     if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
 803         /* Corosync clusters can drop root group access, because we set
 804          * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect
 805          * to corosync.
 806          */
 807         need_root_group = false;
 808     }
 809 
 810     /* start any children that have not been detected */
 811     for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 812         if (pcmk_children[i].pid != 0) {
 813             /* we are already tracking it */
 814             continue;
 815         }
 816 
 817         start_child(&(pcmk_children[i]));
 818     }
 819 
 820     /* From this point on, any daemons being started will be due to
 821      * respawning rather than node start.
 822      *
 823      * This may be useful for the daemons to know
 824      */
 825     pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false);
 826     pacemakerd_state = PCMK__VALUE_RUNNING;
 827     return TRUE;
 828 }
 829 
 830 void
 831 pcmk_shutdown(int nsig)
     /* [previous][next][first][last][top][bottom][index][help] */
 832 {
 833     if (shutdown_trigger == NULL) {
 834         shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
 835     }
 836     mainloop_set_trigger(shutdown_trigger);
 837 }
 838 
 839 void
 840 restart_cluster_subdaemons(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 841 {
 842     for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 843         if (!pcmk_is_set(pcmk_children[i].flags, child_needs_retry) || pcmk_children[i].pid != 0) {
 844             continue;
 845         }
 846 
 847         crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name);
 848         if (start_child(&pcmk_children[i])) {
 849             pcmk_children[i].flags &= ~child_needs_retry;
 850         }
 851     }
 852 }
 853 
 854 static gboolean
 855 stop_child(pcmk_child_t * child, int signal)
     /* [previous][next][first][last][top][bottom][index][help] */
 856 {
 857     if (signal == 0) {
 858         signal = SIGTERM;
 859     }
 860 
 861     /* why to skip PID of 1?
 862        - FreeBSD ~ how untrackable process behind IPC is masqueraded as
 863        - elsewhere: how "init" task is designated; in particular, in systemd
 864          arrangement of socket-based activation, this is pretty real */
 865     if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
 866         crm_debug("Nothing to do for child \"%s\" (process %lld)",
 867                   child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
 868         return TRUE;
 869     }
 870 
 871     if (child->pid <= 0) {
 872         crm_trace("Client %s not running", child->name);
 873         return TRUE;
 874     }
 875 
 876     errno = 0;
 877     if (kill(child->pid, signal) == 0) {
 878         crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld",
 879                    child->name, signal, (long long) child->pid);
 880 
 881     } else {
 882         crm_err("Could not stop %s (process %lld) with signal %d: %s",
 883                 child->name, (long long) child->pid, signal, strerror(errno));
 884     }
 885 
 886     return TRUE;
 887 }
 888 

/* [previous][next][first][last][top][bottom][index][help] */