root/daemons/pacemakerd/pcmkd_subdaemons.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. check_active_before_startup_processes
  2. escalate_shutdown
  3. pcmk_child_exit
  4. pcmk_process_exit
  5. pcmk_shutdown_worker
  6. start_child
  7. child_liveness
  8. find_and_track_existing_processes
  9. init_children_processes
  10. pcmk_shutdown
  11. stop_child

   1 /*
   2  * Copyright 2010-2021 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include "pacemakerd.h"
  12 
  13 #include <errno.h>
  14 #include <grp.h>
  15 #include <signal.h>
  16 #include <stdbool.h>
  17 #include <stdlib.h>
  18 #include <string.h>
  19 #include <sys/types.h>
  20 #include <time.h>
  21 #include <unistd.h>
  22 
  23 #include <crm/cluster.h>
  24 #include <crm/msg_xml.h>
  25 
  26 #define PCMK_PROCESS_CHECK_INTERVAL 5
  27 #define SHUTDOWN_ESCALATION_PERIOD 180000  /* 3m */
  28 
  29 /* Index into the array below */
  30 #define PCMK_CHILD_CONTROLD  3
  31 
  32 static pcmk_child_t pcmk_children[] = {
  33     {
  34         0, 0, 0, FALSE, "none", NULL, NULL, NULL
  35     },
  36     {
  37         0, 3, 0, TRUE,  "pacemaker-execd", NULL,
  38         CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD
  39     },
  40     {
  41         0, 1, 0, TRUE,  "pacemaker-based", CRM_DAEMON_USER,
  42         CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO
  43     },
  44     {
  45         0, 6, 0, TRUE, "pacemaker-controld", CRM_DAEMON_USER,
  46         CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD
  47     },
  48     {
  49         0, 4, 0, TRUE, "pacemaker-attrd", CRM_DAEMON_USER,
  50         CRM_DAEMON_DIR "/pacemaker-attrd", T_ATTRD
  51     },
  52     {
  53         0, 5, 0, TRUE, "pacemaker-schedulerd", CRM_DAEMON_USER,
  54         CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE
  55     },
  56     {
  57         0, 2, 0, TRUE, "pacemaker-fenced", NULL,
  58         CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng"
  59     },
  60 };
  61 
  62 static char *opts_default[] = { NULL, NULL };
  63 static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
  64 
  65 crm_trigger_t *shutdown_trigger = NULL;
  66 crm_trigger_t *startup_trigger = NULL;
  67 
  68 /* When contacted via pacemakerd-api by a client having sbd in
  69  * the name we assume it is sbd-daemon which wants to know
  70  * if pacemakerd shutdown gracefully.
  71  * Thus when everything is shutdown properly pacemakerd
  72  * waits till it has reported the graceful completion of
  73  * shutdown to sbd and just when sbd-client closes the
  74  * connection we can assume that the report has arrived
  75  * properly so that pacemakerd can finally exit.
  76  * Following two variables are used to track that handshake.
  77  */
  78 unsigned int shutdown_complete_state_reported_to = 0;
  79 gboolean shutdown_complete_state_reported_client_closed = FALSE;
  80 
  81 /* state we report when asked via pacemakerd-api status-ping */
  82 const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
  83 gboolean running_with_sbd = FALSE; /* local copy */
  84 
  85 GMainLoop *mainloop = NULL;
  86 
  87 static gboolean fatal_error = FALSE;
  88 static bool global_keep_tracking = false;
  89 
  90 static gboolean check_active_before_startup_processes(gpointer user_data);
  91 static int child_liveness(pcmk_child_t *child);
  92 static gboolean escalate_shutdown(gpointer data);
  93 static gboolean start_child(pcmk_child_t * child);
  94 static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
  95 static void pcmk_process_exit(pcmk_child_t * child);
  96 static gboolean pcmk_shutdown_worker(gpointer user_data);
  97 static gboolean stop_child(pcmk_child_t * child, int signal);
  98 
  99 static gboolean
 100 check_active_before_startup_processes(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 101 {
 102     int start_seq = 1, lpc = 0;
 103     static int max = SIZEOF(pcmk_children);
 104     gboolean keep_tracking = FALSE;
 105 
 106     for (start_seq = 1; start_seq < max; start_seq++) {
 107         for (lpc = 0; lpc < max; lpc++) {
 108             if (pcmk_children[lpc].active_before_startup == FALSE) {
 109                 /* we are already tracking it as a child process. */
 110                 continue;
 111             } else if (start_seq != pcmk_children[lpc].start_seq) {
 112                 continue;
 113             } else {
 114                 int rc = child_liveness(&pcmk_children[lpc]);
 115 
 116                 switch (rc) {
 117                     case pcmk_rc_ok:
 118                         break;
 119                     case pcmk_rc_ipc_unresponsive:
 120                     case pcmk_rc_ipc_pid_only: // This case: it was previously OK
 121                         if (pcmk_children[lpc].respawn == TRUE) {
 122                             crm_err("%s[%lld] terminated%s", pcmk_children[lpc].name,
 123                                     (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid),
 124                                     (rc == pcmk_rc_ipc_pid_only)? " as IPC server" : "");
 125                         } else {
 126                             /* orderly shutdown */
 127                             crm_notice("%s[%lld] terminated%s", pcmk_children[lpc].name,
 128                                        (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid),
 129                                        (rc == pcmk_rc_ipc_pid_only)? " as IPC server" : "");
 130                         }
 131                         pcmk_process_exit(&(pcmk_children[lpc]));
 132                         continue;
 133                     default:
 134                         crm_exit(CRM_EX_FATAL);
 135                         break;  /* static analysis/noreturn */
 136                 }
 137             }
 138             /* at least one of the processes found at startup
 139              * is still going, so keep this recurring timer around */
 140             keep_tracking = TRUE;
 141         }
 142     }
 143 
 144     global_keep_tracking = keep_tracking;
 145     return keep_tracking;
 146 }
 147 
 148 static gboolean
 149 escalate_shutdown(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 150 {
 151     pcmk_child_t *child = data;
 152 
 153     if (child->pid == PCMK__SPECIAL_PID) {
 154         pcmk_process_exit(child);
 155 
 156     } else if (child->pid != 0) {
 157         /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
 158         crm_err("Child %s not terminating in a timely manner, forcing", child->name);
 159         stop_child(child, SIGSEGV);
 160     }
 161     return FALSE;
 162 }
 163 
 164 static void
 165 pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
     /* [previous][next][first][last][top][bottom][index][help] */
 166 {
 167     pcmk_child_t *child = mainloop_child_userdata(p);
 168     const char *name = mainloop_child_name(p);
 169 
 170     if (signo) {
 171         do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
 172                    "%s[%d] terminated with signal %d (core=%d)",
 173                    name, pid, signo, core);
 174 
 175     } else {
 176         switch(exitcode) {
 177             case CRM_EX_OK:
 178                 crm_info("%s[%d] exited with status %d (%s)",
 179                          name, pid, exitcode, crm_exit_str(exitcode));
 180                 break;
 181 
 182             case CRM_EX_FATAL:
 183                 crm_warn("Shutting cluster down because %s[%d] had fatal failure",
 184                          name, pid);
 185                 child->respawn = FALSE;
 186                 fatal_error = TRUE;
 187                 pcmk_shutdown(SIGTERM);
 188                 break;
 189 
 190             case CRM_EX_PANIC:
 191                 crm_emerg("%s[%d] instructed the machine to reset", name, pid);
 192                 child->respawn = FALSE;
 193                 fatal_error = TRUE;
 194                 pcmk__panic(__func__);
 195                 pcmk_shutdown(SIGTERM);
 196                 break;
 197 
 198             default:
 199                 crm_err("%s[%d] exited with status %d (%s)",
 200                         name, pid, exitcode, crm_exit_str(exitcode));
 201                 break;
 202         }
 203     }
 204 
 205     pcmk_process_exit(child);
 206 }
 207 
 208 static void
 209 pcmk_process_exit(pcmk_child_t * child)
     /* [previous][next][first][last][top][bottom][index][help] */
 210 {
 211     child->pid = 0;
 212     child->active_before_startup = FALSE;
 213 
 214     child->respawn_count += 1;
 215     if (child->respawn_count > MAX_RESPAWN) {
 216         crm_err("Child respawn count exceeded by %s", child->name);
 217         child->respawn = FALSE;
 218     }
 219 
 220     if (shutdown_trigger) {
 221         /* resume step-wise shutdown (returned TRUE yields no parallelizing) */
 222         mainloop_set_trigger(shutdown_trigger);
 223 
 224     } else if (!child->respawn) {
 225         /* nothing to do */
 226 
 227     } else if (crm_is_true(getenv("PCMK_fail_fast"))) {
 228         crm_err("Rebooting system because of %s", child->name);
 229         pcmk__panic(__func__);
 230 
 231     } else if (child_liveness(child) == pcmk_rc_ok) {
 232         crm_warn("One-off suppressing strict respawning of a child process %s,"
 233                  " appears alright per %s IPC end-point",
 234                  child->name, child->endpoint);
 235         /* need to monitor how it evolves, and start new process if badly */
 236         child->active_before_startup = TRUE;
 237         if (!global_keep_tracking) {
 238             global_keep_tracking = true;
 239             g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
 240                                   check_active_before_startup_processes, NULL);
 241         }
 242 
 243     } else {
 244         crm_notice("Respawning failed child process: %s", child->name);
 245         start_child(child);
 246     }
 247 }
 248 
 249 static gboolean
 250 pcmk_shutdown_worker(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 251 {
 252     static int phase = SIZEOF(pcmk_children);
 253     static time_t next_log = 0;
 254 
 255     int lpc = 0;
 256 
 257     if (phase == SIZEOF(pcmk_children)) {
 258         crm_notice("Shutting down Pacemaker");
 259         pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
 260     }
 261 
 262     for (; phase > 0; phase--) {
 263         /* Don't stop anything with start_seq < 1 */
 264 
 265         for (lpc = SIZEOF(pcmk_children) - 1; lpc >= 0; lpc--) {
 266             pcmk_child_t *child = &(pcmk_children[lpc]);
 267 
 268             if (phase != child->start_seq) {
 269                 continue;
 270             }
 271 
 272             if (child->pid != 0) {
 273                 time_t now = time(NULL);
 274 
 275                 if (child->respawn) {
 276                     if (child->pid == PCMK__SPECIAL_PID) {
 277                         crm_warn("The process behind %s IPC cannot be"
 278                                  " terminated, so either wait the graceful"
 279                                  " period of %ld s for its native termination"
 280                                  " if it vitally depends on some other daemons"
 281                                  " going down in a controlled way already,"
 282                                  " or locate and kill the correct %s process"
 283                                  " on your own; set PCMK_fail_fast=1 to avoid"
 284                                  " this altogether next time around",
 285                                  child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
 286                                  child->command);
 287                     }
 288                     next_log = now + 30;
 289                     child->respawn = FALSE;
 290                     stop_child(child, SIGTERM);
 291                     if (phase < pcmk_children[PCMK_CHILD_CONTROLD].start_seq) {
 292                         g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
 293                                       escalate_shutdown, child);
 294                     }
 295 
 296                 } else if (now >= next_log) {
 297                     next_log = now + 30;
 298                     crm_notice("Still waiting for %s to terminate "
 299                                CRM_XS " pid=%lld seq=%d",
 300                                child->name, (long long) child->pid,
 301                                child->start_seq);
 302                 }
 303                 return TRUE;
 304             }
 305 
 306             /* cleanup */
 307             crm_debug("%s confirmed stopped", child->name);
 308             child->pid = 0;
 309         }
 310     }
 311 
 312     crm_notice("Shutdown complete");
 313     pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
 314     if (!fatal_error && running_with_sbd &&
 315         pcmk__get_sbd_sync_resource_startup() &&
 316         !shutdown_complete_state_reported_client_closed) {
 317         crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
 318         return TRUE;
 319     }
 320 
 321     {
 322         const char *delay = pcmk__env_option("shutdown_delay");
 323         if(delay) {
 324             sync();
 325             pcmk__sleep_ms(crm_get_msec(delay));
 326         }
 327     }
 328 
 329     g_main_loop_quit(mainloop);
 330 
 331     if (fatal_error) {
 332         crm_notice("Shutting down and staying down after fatal error");
 333 #ifdef SUPPORT_COROSYNC
 334         pcmkd_shutdown_corosync();
 335 #endif
 336         crm_exit(CRM_EX_FATAL);
 337     }
 338 
 339     return TRUE;
 340 }
 341 
 342 /* TODO once libqb is taught to juggle with IPC end-points carried over as
 343         bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
 344         it shall hand over these descriptors here if/once they are successfully
 345         pre-opened in (presumably) child_liveness(), to avoid any remaining
 346         room for races */
 347 static gboolean
 348 start_child(pcmk_child_t * child)
     /* [previous][next][first][last][top][bottom][index][help] */
 349 {
 350     uid_t uid = 0;
 351     gid_t gid = 0;
 352     gboolean use_valgrind = FALSE;
 353     gboolean use_callgrind = FALSE;
 354     const char *env_valgrind = getenv("PCMK_valgrind_enabled");
 355     const char *env_callgrind = getenv("PCMK_callgrind_enabled");
 356 
 357     child->active_before_startup = FALSE;
 358 
 359     if (child->command == NULL) {
 360         crm_info("Nothing to do for child \"%s\"", child->name);
 361         return TRUE;
 362     }
 363 
 364     if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
 365         use_callgrind = TRUE;
 366         use_valgrind = TRUE;
 367 
 368     } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
 369         use_callgrind = TRUE;
 370         use_valgrind = TRUE;
 371 
 372     } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
 373         use_valgrind = TRUE;
 374 
 375     } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
 376         use_valgrind = TRUE;
 377     }
 378 
 379     if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
 380         crm_warn("Cannot enable valgrind for %s:"
 381                  " The location of the valgrind binary is unknown", child->name);
 382         use_valgrind = FALSE;
 383     }
 384 
 385     if (child->uid) {
 386         if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
 387             crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
 388             return FALSE;
 389         }
 390         crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
 391     }
 392 
 393     child->pid = fork();
 394     CRM_ASSERT(child->pid != -1);
 395 
 396     if (child->pid > 0) {
 397         /* parent */
 398         mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
 399 
 400         crm_info("Forked child %lld for process %s%s",
 401                  (long long) child->pid, child->name,
 402                  use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
 403         return TRUE;
 404 
 405     } else {
 406         /* Start a new session */
 407         (void)setsid();
 408 
 409         /* Setup the two alternate arg arrays */
 410         opts_vgrind[0] = strdup(VALGRIND_BIN);
 411         if (use_callgrind) {
 412             opts_vgrind[1] = strdup("--tool=callgrind");
 413             opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
 414             opts_vgrind[3] = strdup(child->command);
 415             opts_vgrind[4] = NULL;
 416         } else {
 417             opts_vgrind[1] = strdup(child->command);
 418             opts_vgrind[2] = NULL;
 419             opts_vgrind[3] = NULL;
 420             opts_vgrind[4] = NULL;
 421         }
 422         opts_default[0] = strdup(child->command);
 423 
 424         if(gid) {
 425             // Whether we need root group access to talk to cluster layer
 426             bool need_root_group = TRUE;
 427 
 428             if (is_corosync_cluster()) {
 429                 /* Corosync clusters can drop root group access, because we set
 430                  * uidgid.gid.${gid}=1 via CMAP, which allows these processes to
 431                  * connect to corosync.
 432                  */
 433                 need_root_group = FALSE;
 434             }
 435 
 436             // Drop root group access if not needed
 437             if (!need_root_group && (setgid(gid) < 0)) {
 438                 crm_warn("Could not set group to %d: %s", gid, strerror(errno));
 439             }
 440 
 441             /* Initialize supplementary groups to only those always granted to
 442              * the user, plus haclient (so we can access IPC).
 443              */
 444             if (initgroups(child->uid, gid) < 0) {
 445                 crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno);
 446             }
 447         }
 448 
 449         if (uid && setuid(uid) < 0) {
 450             crm_warn("Could not set user to %s (id %d): %s",
 451                      child->uid, uid, strerror(errno));
 452         }
 453 
 454         pcmk__close_fds_in_child(true);
 455 
 456         pcmk__open_devnull(O_RDONLY);   // stdin (fd 0)
 457         pcmk__open_devnull(O_WRONLY);   // stdout (fd 1)
 458         pcmk__open_devnull(O_WRONLY);   // stderr (fd 2)
 459 
 460         if (use_valgrind) {
 461             (void)execvp(VALGRIND_BIN, opts_vgrind);
 462         } else {
 463             (void)execvp(child->command, opts_default);
 464         }
 465         crm_crit("Could not execute %s: %s", child->command, strerror(errno));
 466         crm_exit(CRM_EX_FATAL);
 467     }
 468     return TRUE;                /* never reached */
 469 }
 470 
 471 /*!
 472  * \internal
 473  * \brief Check the liveness of the child based on IPC name and PID if tracked
 474  *
 475  * \param[inout] child  Child tracked data
 476  *
 477  * \return Standard Pacemaker return code
 478  *
 479  * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
 480  *       indicating that no trace of IPC liveness was detected,
 481  *       pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
 482  *       an unauthorized process, and pcmk_rc_ipc_pid_only indicating that
 483  *       the child is up by PID but not IPC end-point (possibly starting).
 484  * \note This function doesn't modify any of \p child members but \c pid,
 485  *       and is not actively toying with processes as such but invoking
 486  *       \c stop_child in one particular case (there's for some reason
 487  *       a different authentic holder of the IPC end-point).
 488  */
 489 static int
 490 child_liveness(pcmk_child_t *child)
     /* [previous][next][first][last][top][bottom][index][help] */
 491 {
 492     uid_t cl_uid = 0;
 493     gid_t cl_gid = 0;
 494     const uid_t root_uid = 0;
 495     const gid_t root_gid = 0;
 496     const uid_t *ref_uid;
 497     const gid_t *ref_gid;
 498     int rc = pcmk_rc_ipc_unresponsive;
 499     pid_t ipc_pid = 0;
 500 
 501     if (child->endpoint == NULL
 502             && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
 503         crm_err("Cannot track child %s for missing both API end-point and PID",
 504                 child->name);
 505         rc = EINVAL; // Misuse of function when child is not trackable
 506 
 507     } else if (child->endpoint != NULL) {
 508         int legacy_rc = pcmk_ok;
 509 
 510         if (child->uid == NULL) {
 511             ref_uid = &root_uid;
 512             ref_gid = &root_gid;
 513         } else {
 514             ref_uid = &cl_uid;
 515             ref_gid = &cl_gid;
 516             legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
 517         }
 518 
 519         if (legacy_rc < 0) {
 520             rc = pcmk_legacy2rc(legacy_rc);
 521             crm_err("Could not find user and group IDs for user %s: %s "
 522                     CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
 523         } else {
 524             rc = pcmk__ipc_is_authentic_process_active(child->endpoint,
 525                                                        *ref_uid, *ref_gid,
 526                                                        &ipc_pid);
 527             if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
 528                 if (child->pid <= 0) {
 529                     /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this
 530                      * initializes a new child. If rc is
 531                      * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will
 532                      * investigate further.
 533                      */
 534                     child->pid = ipc_pid;
 535                 } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
 536                     /* An unexpected (but authorized) process is responding to
 537                      * IPC. Investigate further.
 538                      */
 539                     rc = pcmk_rc_ipc_unresponsive;
 540                 }
 541             }
 542         }
 543     }
 544 
 545     if (rc == pcmk_rc_ipc_unresponsive) {
 546         /* If we get here, a child without IPC is being tracked, no IPC liveness
 547          * has been detected, or IPC liveness has been detected with an
 548          * unexpected (but authorized) process. This is safe on FreeBSD since
 549          * the only change possible from a proper child's PID into "special" PID
 550          * of 1 behind more loosely related process.
 551          */
 552         int ret = pcmk__pid_active(child->pid, child->name);
 553 
 554         if (ipc_pid && ((ret != pcmk_rc_ok)
 555                         || ipc_pid == PCMK__SPECIAL_PID
 556                         || (pcmk__pid_active(ipc_pid,
 557                                              child->name) == pcmk_rc_ok))) {
 558             /* An unexpected (but authorized) process was detected at the IPC
 559              * endpoint, and either it is active, or the child we're tracking is
 560              * not.
 561              */
 562 
 563             if (ret == pcmk_rc_ok) {
 564                 /* The child we're tracking is active. Kill it, and adopt the
 565                  * detected process. This assumes that our children don't fork
 566                  * (thus getting a different PID owning the IPC), but rather the
 567                  * tracking got out of sync because of some means external to
 568                  * Pacemaker, and adopting the detected process is better than
 569                  * killing it and possibly having to spawn a new child.
 570                  */
 571                 /* not possessing IPC, afterall (what about corosync CPG?) */
 572                 stop_child(child, SIGKILL);
 573             }
 574             rc = pcmk_rc_ok;
 575             child->pid = ipc_pid;
 576         } else if (ret == pcmk_rc_ok) {
 577             // Our tracked child's PID was found active, but not its IPC
 578             rc = pcmk_rc_ipc_pid_only;
 579         } else if ((child->pid == 0) && (ret == EINVAL)) {
 580             // FreeBSD can return EINVAL
 581             rc = pcmk_rc_ipc_unresponsive;
 582         } else {
 583             switch (ret) {
 584                 case EACCES:
 585                     rc = pcmk_rc_ipc_unauthorized;
 586                     break;
 587                 case ESRCH:
 588                     rc = pcmk_rc_ipc_unresponsive;
 589                     break;
 590                 default:
 591                     rc = ret;
 592                     break;
 593             }
 594         }
 595     }
 596     return rc;
 597 }
 598 
 599 /*!
 600  * \internal
 601  * \brief Initial one-off check of the pre-existing "child" processes
 602  *
 603  * With "child" process, we mean the subdaemon that defines an API end-point
 604  * (all of them do as of the comment) -- the possible complement is skipped
 605  * as it is deemed it has no such shared resources to cause conflicts about,
 606  * hence it can presumably be started anew without hesitation.
 607  * If that won't hold true in the future, the concept of a shared resource
 608  * will have to be generalized beyond the API end-point.
 609  *
 610  * For boundary cases that the "child" is still starting (IPC end-point is yet
 611  * to be witnessed), or more rarely (practically FreeBSD only), when there's
 612  * a pre-existing "untrackable" authentic process, we give the situation some
 613  * time to possibly unfold in the right direction, meaning that said socket
 614  * will appear or the unattainable process will disappear per the observable
 615  * IPC, respectively.
 616  *
 617  * \return Standard Pacemaker return code
 618  *
 619  * \note Since this gets run at the very start, \c respawn_count fields
 620  *       for particular children get temporarily overloaded with "rounds
 621  *       of waiting" tracking, restored once we are about to finish with
 622  *       success (i.e. returning value >=0) and will remain unrestored
 623  *       otherwise.  One way to suppress liveness detection logic for
 624  *       particular child is to set the said value to a negative number.
 625  */
 626 #define WAIT_TRIES 4  /* together with interleaved sleeps, worst case ~ 1s */
 627 int
 628 find_and_track_existing_processes(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 629 {
 630     bool tracking = false;
 631     bool wait_in_progress;
 632     int rc;
 633     size_t i, rounds;
 634 
 635     for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
 636         wait_in_progress = false;
 637         for (i = 0; i < SIZEOF(pcmk_children); i++) {
 638 
 639             if ((pcmk_children[i].endpoint == NULL)
 640                 || (pcmk_children[i].respawn_count < 0)) {
 641                 continue;
 642             }
 643 
 644             rc = child_liveness(&pcmk_children[i]);
 645             if (rc == pcmk_rc_ipc_unresponsive) {
 646                 /* As a speculation, don't give up if there are more rounds to
 647                  * come for other reasons, but don't artificially wait just
 648                  * because of this, since we would preferably start ASAP.
 649                  */
 650                 continue;
 651             }
 652 
 653             pcmk_children[i].respawn_count = rounds;
 654             switch (rc) {
 655                 case pcmk_rc_ok:
 656                     if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
 657                         if (crm_is_true(getenv("PCMK_fail_fast"))) {
 658                             crm_crit("Cannot reliably track pre-existing"
 659                                      " authentic process behind %s IPC on this"
 660                                      " platform and PCMK_fail_fast requested",
 661                                      pcmk_children[i].endpoint);
 662                             return EOPNOTSUPP;
 663                         } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
 664                             crm_notice("Assuming pre-existing authentic, though"
 665                                        " on this platform untrackable, process"
 666                                        " behind %s IPC is stable (was in %d"
 667                                        " previous samples) so rather than"
 668                                        " bailing out (PCMK_fail_fast not"
 669                                        " requested), we just switch to a less"
 670                                        " optimal IPC liveness monitoring"
 671                                        " (not very suitable for heavy load)",
 672                                        pcmk_children[i].name, WAIT_TRIES - 1);
 673                             crm_warn("The process behind %s IPC cannot be"
 674                                      " terminated, so the overall shutdown"
 675                                      " will get delayed implicitly (%ld s),"
 676                                      " which serves as a graceful period for"
 677                                      " its native termination if it vitally"
 678                                      " depends on some other daemons going"
 679                                      " down in a controlled way already",
 680                                      pcmk_children[i].name,
 681                                      (long) SHUTDOWN_ESCALATION_PERIOD);
 682                         } else {
 683                             wait_in_progress = true;
 684                             crm_warn("Cannot reliably track pre-existing"
 685                                      " authentic process behind %s IPC on this"
 686                                      " platform, can still disappear in %d"
 687                                      " attempt(s)", pcmk_children[i].endpoint,
 688                                      WAIT_TRIES - pcmk_children[i].respawn_count);
 689                             continue;
 690                         }
 691                     }
 692                     crm_notice("Tracking existing %s process (pid=%lld)",
 693                                pcmk_children[i].name,
 694                                (long long) PCMK__SPECIAL_PID_AS_0(
 695                                                pcmk_children[i].pid));
 696                     pcmk_children[i].respawn_count = -1;  /* 0~keep watching */
 697                     pcmk_children[i].active_before_startup = TRUE;
 698                     tracking = true;
 699                     break;
 700                 case pcmk_rc_ipc_pid_only:
 701                     if (pcmk_children[i].respawn_count == WAIT_TRIES) {
 702                         crm_crit("%s IPC end-point for existing authentic"
 703                                  " process %lld did not (re)appear",
 704                                  pcmk_children[i].endpoint,
 705                                  (long long) PCMK__SPECIAL_PID_AS_0(
 706                                                  pcmk_children[i].pid));
 707                         return rc;
 708                     }
 709                     wait_in_progress = true;
 710                     crm_warn("Cannot find %s IPC end-point for existing"
 711                              " authentic process %lld, can still (re)appear"
 712                              " in %d attempts (?)",
 713                              pcmk_children[i].endpoint,
 714                              (long long) PCMK__SPECIAL_PID_AS_0(
 715                                              pcmk_children[i].pid),
 716                              WAIT_TRIES - pcmk_children[i].respawn_count);
 717                     continue;
 718                 default:
 719                     crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d",
 720                              pcmk_children[i].name, pcmk_rc_str(rc), rc);
 721                     return rc;
 722             }
 723         }
 724         if (!wait_in_progress) {
 725             break;
 726         }
 727         pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen
 728     }
 729     for (i = 0; i < SIZEOF(pcmk_children); i++) {
 730         pcmk_children[i].respawn_count = 0;  /* restore pristine state */
 731     }
 732 
 733     if (tracking) {
 734         g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
 735                               check_active_before_startup_processes, NULL);
 736     }
 737     return pcmk_rc_ok;
 738 }
 739 
 740 gboolean
 741 init_children_processes(void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 742 {
 743     int start_seq = 1, lpc = 0;
 744     static int max = SIZEOF(pcmk_children);
 745 
 746     /* start any children that have not been detected */
 747     for (start_seq = 1; start_seq < max; start_seq++) {
 748         /* don't start anything with start_seq < 1 */
 749         for (lpc = 0; lpc < max; lpc++) {
 750             if (pcmk_children[lpc].pid != 0) {
 751                 /* we are already tracking it */
 752                 continue;
 753             }
 754 
 755             if (start_seq == pcmk_children[lpc].start_seq) {
 756                 start_child(&(pcmk_children[lpc]));
 757             }
 758         }
 759     }
 760 
 761     /* From this point on, any daemons being started will be due to
 762      * respawning rather than node start.
 763      *
 764      * This may be useful for the daemons to know
 765      */
 766     setenv("PCMK_respawned", "true", 1);
 767     pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
 768     return TRUE;
 769 }
 770 
 771 void
 772 pcmk_shutdown(int nsig)
     /* [previous][next][first][last][top][bottom][index][help] */
 773 {
 774     if (shutdown_trigger == NULL) {
 775         shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
 776     }
 777     mainloop_set_trigger(shutdown_trigger);
 778 }
 779 
 780 static gboolean
 781 stop_child(pcmk_child_t * child, int signal)
     /* [previous][next][first][last][top][bottom][index][help] */
 782 {
 783     if (signal == 0) {
 784         signal = SIGTERM;
 785     }
 786 
 787     /* why to skip PID of 1?
 788        - FreeBSD ~ how untrackable process behind IPC is masqueraded as
 789        - elsewhere: how "init" task is designated; in particular, in systemd
 790          arrangement of socket-based activation, this is pretty real */
 791     if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
 792         crm_debug("Nothing to do for child \"%s\" (process %lld)",
 793                   child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
 794         return TRUE;
 795     }
 796 
 797     if (child->pid <= 0) {
 798         crm_trace("Client %s not running", child->name);
 799         return TRUE;
 800     }
 801 
 802     errno = 0;
 803     if (kill(child->pid, signal) == 0) {
 804         crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld",
 805                    child->name, signal, (long long) child->pid);
 806 
 807     } else {
 808         crm_err("Could not stop %s (process %lld) with signal %d: %s",
 809                 child->name, (long long) child->pid, signal, strerror(errno));
 810     }
 811 
 812     return TRUE;
 813 }
 814 

/* [previous][next][first][last][top][bottom][index][help] */