root/daemons/pacemakerd/pcmkd_subdaemons.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. pcmkd_cluster_connected
  2. check_next_subdaemon
  3. escalate_shutdown
  4. pcmk_child_exit
  5. pcmk_process_exit
  6. pcmk_shutdown_worker
  7. start_child
  8. child_liveness
  9. find_and_track_existing_processes
  10. init_children_processes
  11. pcmk_shutdown
  12. restart_cluster_subdaemons
  13. stop_child

   1 /*
   2  * Copyright 2010-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include "pacemakerd.h"
  12 
  13 #if SUPPORT_COROSYNC
  14 #include "pcmkd_corosync.h"
  15 #endif
  16 
  17 #include <errno.h>
  18 #include <grp.h>
  19 #include <signal.h>
  20 #include <stdbool.h>
  21 #include <stdio.h>
  22 #include <stdlib.h>
  23 #include <string.h>
  24 #include <sys/types.h>
  25 #include <time.h>
  26 #include <unistd.h>
  27 
  28 #include <crm/cluster.h>
  29 #include <crm/common/xml.h>
  30 
  31 enum child_daemon_flags {
  32     child_none                  = 0,
  33     child_respawn               = 1 << 0,
  34     child_needs_cluster         = 1 << 1,
  35     child_needs_retry           = 1 << 2,
  36     child_active_before_startup = 1 << 3,
  37 };
  38 
  39 typedef struct pcmk_child_s {
  40     pid_t pid;
  41     int respawn_count;
  42     const char *name;
  43     const char *uid;
  44     const char *command;
  45     const char *endpoint;  /* IPC server name */
  46     int check_count;
  47     uint32_t flags;
  48 } pcmk_child_t;
  49 
  50 #define PCMK_PROCESS_CHECK_INTERVAL 1
  51 #define PCMK_PROCESS_CHECK_RETRIES  5
  52 #define SHUTDOWN_ESCALATION_PERIOD  180000  /* 3m */
  53 
  54 /* Index into the array below */
  55 #define PCMK_CHILD_CONTROLD  5
  56 
  57 static pcmk_child_t pcmk_children[] = {
  58     {
  59         0, 0, "pacemaker-based", CRM_DAEMON_USER,
  60         CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO,
  61         0, child_respawn | child_needs_cluster
  62     },
  63     {
  64         0, 0, "pacemaker-fenced", NULL,
  65         CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng",
  66         0, child_respawn | child_needs_cluster
  67     },
  68     {
  69         0, 0, "pacemaker-execd", NULL,
  70         CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD,
  71         0, child_respawn
  72     },
  73     {
  74         0, 0, "pacemaker-attrd", CRM_DAEMON_USER,
  75         CRM_DAEMON_DIR "/pacemaker-attrd", PCMK__VALUE_ATTRD,
  76         0, child_respawn | child_needs_cluster
  77     },
  78     {
  79         0, 0, "pacemaker-schedulerd", CRM_DAEMON_USER,
  80         CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE,
  81         0, child_respawn
  82     },
  83     {
  84         0, 0, "pacemaker-controld", CRM_DAEMON_USER,
  85         CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD,
  86         0, child_respawn | child_needs_cluster
  87     },
  88 };
  89 
  90 static char *opts_default[] = { NULL, NULL };
  91 static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
  92 
  93 crm_trigger_t *shutdown_trigger = NULL;
  94 crm_trigger_t *startup_trigger = NULL;
  95 time_t subdaemon_check_progress = 0;
  96 
  97 // Whether we need root group access to talk to cluster layer
  98 static bool need_root_group = true;
  99 
 100 /* When contacted via pacemakerd-api by a client having sbd in
 101  * the name we assume it is sbd-daemon which wants to know
 102  * if pacemakerd shutdown gracefully.
 103  * Thus when everything is shutdown properly pacemakerd
 104  * waits till it has reported the graceful completion of
 105  * shutdown to sbd and just when sbd-client closes the
 106  * connection we can assume that the report has arrived
 107  * properly so that pacemakerd can finally exit.
 108  * Following two variables are used to track that handshake.
 109  */
 110 unsigned int shutdown_complete_state_reported_to = 0;
 111 gboolean shutdown_complete_state_reported_client_closed = FALSE;
 112 
 113 /* state we report when asked via pacemakerd-api status-ping */
 114 const char *pacemakerd_state = PCMK__VALUE_INIT;
 115 gboolean running_with_sbd = FALSE; /* local copy */
 116 
 117 GMainLoop *mainloop = NULL;
 118 
 119 static gboolean fatal_error = FALSE;
 120 
 121 static int child_liveness(pcmk_child_t *child);
 122 static gboolean escalate_shutdown(gpointer data);
 123 static int start_child(pcmk_child_t * child);
 124 static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
 125 static void pcmk_process_exit(pcmk_child_t * child);
 126 static gboolean pcmk_shutdown_worker(gpointer user_data);
 127 static gboolean stop_child(pcmk_child_t * child, int signal);
 128 
 129 static bool
 130 pcmkd_cluster_connected(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 131 {
 132 #if SUPPORT_COROSYNC
 133     return pcmkd_corosync_connected();
 134 #else
 135     return true;
 136 #endif
 137 }
 138 
 139 static gboolean
 140 check_next_subdaemon(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 141 {
 142     static int next_child = 0;
 143 
 144     pcmk_child_t *child = &(pcmk_children[next_child]);
 145     const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid);
 146     int rc = child_liveness(child);
 147 
 148     crm_trace("Checked %s[%lld]: %s (%d)",
 149               child->name, pid, pcmk_rc_str(rc), rc);
 150 
 151     switch (rc) {
 152         case pcmk_rc_ok:
 153             child->check_count = 0;
 154             subdaemon_check_progress = time(NULL);
 155             break;
 156 
 157         case pcmk_rc_ipc_pid_only: // Child was previously OK
 158             if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) {
 159                 crm_crit("%s[%lld] is unresponsive to IPC after %d attempt%s "
 160                          "and will now be killed",
 161                          child->name, pid, child->check_count,
 162                          pcmk__plural_s(child->check_count));
 163                 stop_child(child, SIGKILL);
 164                 if (pcmk_is_set(child->flags, child_respawn)) {
 165                     // Respawn limit hasn't been reached, so retry another round
 166                     child->check_count = 0;
 167                 }
 168             } else {
 169                 crm_notice("%s[%lld] is unresponsive to IPC after %d attempt%s",
 170                            child->name, pid, child->check_count,
 171                            pcmk__plural_s(child->check_count));
 172                 if (pcmk_is_set(child->flags, child_respawn)) {
 173                     /* as long as the respawn-limit isn't reached
 174                        and we haven't run out of connect retries
 175                        we account this as progress we are willing
 176                        to tell to sbd
 177                      */
 178                     subdaemon_check_progress = time(NULL);
 179                 }
 180             }
 181             /* go to the next child and see if
 182                we can make progress there
 183              */
 184             break;
 185         case pcmk_rc_ipc_unresponsive:
 186             if (!pcmk_is_set(child->flags, child_respawn)) {
 187                 /* if a subdaemon is down and we don't want it
 188                    to be restarted this is a success during
 189                    shutdown. if it isn't restarted anymore
 190                    due to MAX_RESPAWN it is
 191                    rather no success.
 192                  */
 193                 if (child->respawn_count <= MAX_RESPAWN) {
 194                     subdaemon_check_progress = time(NULL);
 195                 }
 196             }
 197             if (!pcmk_is_set(child->flags, child_active_before_startup)) {
 198                 crm_trace("%s[%lld] terminated (relying on SIGCHLD handler)",
 199                           child->name, pid);
 200                 break;
 201             }
 202             if (pcmk_is_set(child->flags, child_respawn)) {
 203                 crm_err("%s[%lld] terminated", child->name, pid);
 204             } else {
 205                 /* orderly shutdown */
 206                 crm_notice("%s[%lld] terminated", child->name, pid);
 207             }
 208             pcmk_process_exit(child);
 209             break;
 210         default:
 211             crm_exit(CRM_EX_FATAL);
 212             break;  /* static analysis/noreturn */
 213     }
 214 
 215     if (++next_child >= PCMK__NELEM(pcmk_children)) {
 216         next_child = 0;
 217     }
 218 
 219     return G_SOURCE_CONTINUE;
 220 }
 221 
 222 static gboolean
 223 escalate_shutdown(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 224 {
 225     pcmk_child_t *child = data;
 226 
 227     if (child->pid == PCMK__SPECIAL_PID) {
 228         pcmk_process_exit(child);
 229 
 230     } else if (child->pid != 0) {
 231         /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
 232         crm_err("Child %s not terminating in a timely manner, forcing", child->name);
 233         stop_child(child, SIGSEGV);
 234     }
 235     return FALSE;
 236 }
 237 
 238 static void
 239 pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
     /* [previous][next][first][last][top][bottom][index][help] */
 240 {
 241     pcmk_child_t *child = mainloop_child_userdata(p);
 242     const char *name = mainloop_child_name(p);
 243 
 244     if (signo) {
 245         do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
 246                    "%s[%d] terminated with signal %d (%s)%s",
 247                    name, pid, signo, strsignal(signo),
 248                    (core? " and dumped core" : ""));
 249 
 250     } else {
 251         switch(exitcode) {
 252             case CRM_EX_OK:
 253                 crm_info("%s[%d] exited with status %d (%s)",
 254                          name, pid, exitcode, crm_exit_str(exitcode));
 255                 break;
 256 
 257             case CRM_EX_FATAL:
 258                 crm_warn("Shutting cluster down because %s[%d] had fatal failure",
 259                          name, pid);
 260                 child->flags &= ~child_respawn;
 261                 fatal_error = TRUE;
 262                 pcmk_shutdown(SIGTERM);
 263                 break;
 264 
 265             case CRM_EX_PANIC:
 266                 crm_emerg("%s[%d] instructed the machine to reset", name, pid);
 267                 child->flags &= ~child_respawn;
 268                 fatal_error = TRUE;
 269                 pcmk__panic(__func__);
 270                 pcmk_shutdown(SIGTERM);
 271                 break;
 272 
 273             default:
 274                 crm_err("%s[%d] exited with status %d (%s)",
 275                         name, pid, exitcode, crm_exit_str(exitcode));
 276                 break;
 277         }
 278     }
 279 
 280     pcmk_process_exit(child);
 281 }
 282 
 283 static void
 284 pcmk_process_exit(pcmk_child_t * child)
     /* [previous][next][first][last][top][bottom][index][help] */
 285 {
 286     child->pid = 0;
 287     child->flags &= ~child_active_before_startup;
 288     child->check_count = 0;
 289 
 290     child->respawn_count += 1;
 291     if (child->respawn_count > MAX_RESPAWN) {
 292         crm_err("Child respawn count exceeded by %s", child->name);
 293         child->flags &= ~child_respawn;
 294     }
 295 
 296     if (shutdown_trigger) {
 297         /* resume step-wise shutdown (returned TRUE yields no parallelizing) */
 298         mainloop_set_trigger(shutdown_trigger);
 299 
 300     } else if (!pcmk_is_set(child->flags, child_respawn)) {
 301         /* nothing to do */
 302 
 303     } else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
 304         crm_err("Rebooting system because of %s", child->name);
 305         pcmk__panic(__func__);
 306 
 307     } else if (child_liveness(child) == pcmk_rc_ok) {
 308         crm_warn("One-off suppressing strict respawning of a child process %s,"
 309                  " appears alright per %s IPC end-point",
 310                  child->name, child->endpoint);
 311 
 312     } else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) {
 313         crm_notice("Not respawning %s subdaemon until cluster returns",
 314                    child->name);
 315         child->flags |= child_needs_retry;
 316 
 317     } else {
 318         crm_notice("Respawning %s subdaemon after unexpected exit",
 319                    child->name);
 320         start_child(child);
 321     }
 322 }
 323 
 324 static gboolean
 325 pcmk_shutdown_worker(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 326 {
 327     static int phase = PCMK__NELEM(pcmk_children) - 1;
 328     static time_t next_log = 0;
 329 
 330     if (phase == PCMK__NELEM(pcmk_children) - 1) {
 331         crm_notice("Shutting down Pacemaker");
 332         pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN;
 333     }
 334 
 335     for (; phase >= 0; phase--) {
 336         pcmk_child_t *child = &(pcmk_children[phase]);
 337 
 338         if (child->pid != 0) {
 339             time_t now = time(NULL);
 340 
 341             if (pcmk_is_set(child->flags, child_respawn)) {
 342                 if (child->pid == PCMK__SPECIAL_PID) {
 343                     crm_warn("The process behind %s IPC cannot be"
 344                              " terminated, so either wait the graceful"
 345                              " period of %ld s for its native termination"
 346                              " if it vitally depends on some other daemons"
 347                              " going down in a controlled way already,"
 348                              " or locate and kill the correct %s process"
 349                              " on your own; set PCMK_" PCMK__ENV_FAIL_FAST "=1"
 350                              " to avoid this altogether next time around",
 351                              child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
 352                              child->command);
 353                 }
 354                 next_log = now + 30;
 355                 child->flags &= ~child_respawn;
 356                 stop_child(child, SIGTERM);
 357                 if (phase < PCMK_CHILD_CONTROLD) {
 358                     g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
 359                                   escalate_shutdown, child);
 360                 }
 361 
 362             } else if (now >= next_log) {
 363                 next_log = now + 30;
 364                 crm_notice("Still waiting for %s to terminate "
 365                            CRM_XS " pid=%lld",
 366                            child->name, (long long) child->pid);
 367             }
 368             return TRUE;
 369         }
 370 
 371         /* cleanup */
 372         crm_debug("%s confirmed stopped", child->name);
 373         child->pid = 0;
 374     }
 375 
 376     crm_notice("Shutdown complete");
 377     pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE;
 378     if (!fatal_error && running_with_sbd &&
 379         pcmk__get_sbd_sync_resource_startup() &&
 380         !shutdown_complete_state_reported_client_closed) {
 381         crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
 382         return TRUE;
 383     }
 384 
 385     // @COMPAT Drop shutdown delay at 3.0.0
 386     {
 387         const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY);
 388         if(delay) {
 389             long long delay_ms = crm_get_msec(delay);
 390 
 391             sync();
 392             if (delay_ms > 0) {
 393                 pcmk__sleep_ms((unsigned int) QB_MIN(delay_ms, UINT_MAX));
 394             }
 395         }
 396     }
 397 
 398     g_main_loop_quit(mainloop);
 399 
 400     if (fatal_error) {
 401         crm_notice("Shutting down and staying down after fatal error");
 402 #ifdef SUPPORT_COROSYNC
 403         pcmkd_shutdown_corosync();
 404 #endif
 405         crm_exit(CRM_EX_FATAL);
 406     }
 407 
 408     return TRUE;
 409 }
 410 
 411 /* TODO once libqb is taught to juggle with IPC end-points carried over as
 412         bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
 413         it shall hand over these descriptors here if/once they are successfully
 414         pre-opened in (presumably) child_liveness(), to avoid any remaining
 415         room for races */
 416  // \return Standard Pacemaker return code
 417 static int
 418 start_child(pcmk_child_t * child)
     /* [previous][next][first][last][top][bottom][index][help] */
 419 {
 420     uid_t uid = 0;
 421     gid_t gid = 0;
 422     gboolean use_valgrind = FALSE;
 423     gboolean use_callgrind = FALSE;
 424     const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED);
 425     const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED);
 426 
 427     child->flags &= ~child_active_before_startup;
 428     child->check_count = 0;
 429 
 430     if (child->command == NULL) {
 431         crm_info("Nothing to do for child \"%s\"", child->name);
 432         return pcmk_rc_ok;
 433     }
 434 
 435     if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
 436         use_callgrind = TRUE;
 437         use_valgrind = TRUE;
 438 
 439     } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
 440         use_callgrind = TRUE;
 441         use_valgrind = TRUE;
 442 
 443     } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
 444         use_valgrind = TRUE;
 445 
 446     } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
 447         use_valgrind = TRUE;
 448     }
 449 
 450     if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
 451         crm_warn("Cannot enable valgrind for %s:"
 452                  " The location of the valgrind binary is unknown", child->name);
 453         use_valgrind = FALSE;
 454     }
 455 
 456     if ((child->uid != NULL) && (crm_user_lookup(child->uid, &uid, &gid) < 0)) {
 457         crm_err("Invalid user (%s) for subdaemon %s: not found",
 458                 child->uid, child->name);
 459         return EACCES;
 460     }
 461 
 462     child->pid = fork();
 463     pcmk__assert(child->pid != -1);
 464 
 465     if (child->pid > 0) {
 466         /* parent */
 467         mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
 468 
 469         crm_info("Forked process %lld using user %lu (%s) and group %lu "
 470                  "for subdaemon %s%s",
 471                  (long long) child->pid, (unsigned long) uid,
 472                  pcmk__s(child->uid, "root"), (unsigned long) gid, child->name,
 473                  use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
 474         return pcmk_rc_ok;
 475 
 476     } else {
 477         /* Start a new session */
 478         (void)setsid();
 479 
 480         /* Setup the two alternate arg arrays */
 481         opts_vgrind[0] = pcmk__str_copy(VALGRIND_BIN);
 482         if (use_callgrind) {
 483             opts_vgrind[1] = pcmk__str_copy("--tool=callgrind");
 484             opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file="
 485                                             CRM_STATE_DIR "/callgrind.out.%p");
 486             opts_vgrind[3] = pcmk__str_copy(child->command);
 487             opts_vgrind[4] = NULL;
 488         } else {
 489             opts_vgrind[1] = pcmk__str_copy(child->command);
 490             opts_vgrind[2] = NULL;
 491             opts_vgrind[3] = NULL;
 492             opts_vgrind[4] = NULL;
 493         }
 494         opts_default[0] = pcmk__str_copy(child->command);
 495 
 496         if(gid) {
 497             // Drop root group access if not needed
 498             if (!need_root_group && (setgid(gid) < 0)) {
 499                 crm_warn("Could not set group to %d: %s", gid, strerror(errno));
 500             }
 501 
 502             /* Initialize supplementary groups to only those always granted to
 503              * the user, plus haclient (so we can access IPC).
 504              */
 505             if (initgroups(child->uid, gid) < 0) {
 506                 crm_err("Cannot initialize groups for %s: %s (%d)",
 507                         child->uid, pcmk_rc_str(errno), errno);
 508             }
 509         }
 510 
 511         if (uid && setuid(uid) < 0) {
 512             crm_warn("Could not set user to %s (id %d): %s",
 513                      child->uid, uid, strerror(errno));
 514         }
 515 
 516         pcmk__close_fds_in_child(true);
 517 
 518         pcmk__open_devnull(O_RDONLY);   // stdin (fd 0)
 519         pcmk__open_devnull(O_WRONLY);   // stdout (fd 1)
 520         pcmk__open_devnull(O_WRONLY);   // stderr (fd 2)
 521 
 522         if (use_valgrind) {
 523             (void)execvp(VALGRIND_BIN, opts_vgrind);
 524         } else {
 525             (void)execvp(child->command, opts_default);
 526         }
 527         crm_crit("Could not execute %s: %s", child->command, strerror(errno));
 528         crm_exit(CRM_EX_FATAL);
 529     }
 530     return pcmk_rc_ok;          /* never reached */
 531 }
 532 
 533 /*!
 534  * \internal
 535  * \brief Check the liveness of the child based on IPC name and PID if tracked
 536  *
 537  * \param[in,out] child  Child tracked data
 538  *
 539  * \return Standard Pacemaker return code
 540  *
 541  * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
 542  *       indicating that no trace of IPC liveness was detected,
 543  *       pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
 544  *       an unauthorized process, and pcmk_rc_ipc_pid_only indicating that
 545  *       the child is up by PID but not IPC end-point (possibly starting).
 546  * \note This function doesn't modify any of \p child members but \c pid,
 547  *       and is not actively toying with processes as such but invoking
 548  *       \c stop_child in one particular case (there's for some reason
 549  *       a different authentic holder of the IPC end-point).
 550  */
 551 static int
 552 child_liveness(pcmk_child_t *child)
     /* [previous][next][first][last][top][bottom][index][help] */
 553 {
 554     uid_t cl_uid = 0;
 555     gid_t cl_gid = 0;
 556     const uid_t root_uid = 0;
 557     const gid_t root_gid = 0;
 558     const uid_t *ref_uid;
 559     const gid_t *ref_gid;
 560     int rc = pcmk_rc_ipc_unresponsive;
 561     pid_t ipc_pid = 0;
 562 
 563     if (child->endpoint == NULL
 564             && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
 565         crm_err("Cannot track child %s for missing both API end-point and PID",
 566                 child->name);
 567         rc = EINVAL; // Misuse of function when child is not trackable
 568 
 569     } else if (child->endpoint != NULL) {
 570         int legacy_rc = pcmk_ok;
 571 
 572         if (child->uid == NULL) {
 573             ref_uid = &root_uid;
 574             ref_gid = &root_gid;
 575         } else {
 576             ref_uid = &cl_uid;
 577             ref_gid = &cl_gid;
 578             legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
 579         }
 580 
 581         if (legacy_rc < 0) {
 582             rc = pcmk_legacy2rc(legacy_rc);
 583             crm_err("Could not find user and group IDs for user %s: %s "
 584                     CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
 585         } else {
 586             rc = pcmk__ipc_is_authentic_process_active(child->endpoint,
 587                                                        *ref_uid, *ref_gid,
 588                                                        &ipc_pid);
 589             if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
 590                 if (child->pid <= 0) {
 591                     /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this
 592                      * initializes a new child. If rc is
 593                      * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will
 594                      * investigate further.
 595                      */
 596                     child->pid = ipc_pid;
 597                 } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
 598                     /* An unexpected (but authorized) process is responding to
 599                      * IPC. Investigate further.
 600                      */
 601                     rc = pcmk_rc_ipc_unresponsive;
 602                 }
 603             }
 604         }
 605     }
 606 
 607     if (rc == pcmk_rc_ipc_unresponsive) {
 608         /* If we get here, a child without IPC is being tracked, no IPC liveness
 609          * has been detected, or IPC liveness has been detected with an
 610          * unexpected (but authorized) process. This is safe on FreeBSD since
 611          * the only change possible from a proper child's PID into "special" PID
 612          * of 1 behind more loosely related process.
 613          */
 614         int ret = pcmk__pid_active(child->pid, child->name);
 615 
 616         if (ipc_pid && ((ret != pcmk_rc_ok)
 617                         || ipc_pid == PCMK__SPECIAL_PID
 618                         || (pcmk__pid_active(ipc_pid,
 619                                              child->name) == pcmk_rc_ok))) {
 620             /* An unexpected (but authorized) process was detected at the IPC
 621              * endpoint, and either it is active, or the child we're tracking is
 622              * not.
 623              */
 624 
 625             if (ret == pcmk_rc_ok) {
 626                 /* The child we're tracking is active. Kill it, and adopt the
 627                  * detected process. This assumes that our children don't fork
 628                  * (thus getting a different PID owning the IPC), but rather the
 629                  * tracking got out of sync because of some means external to
 630                  * Pacemaker, and adopting the detected process is better than
 631                  * killing it and possibly having to spawn a new child.
 632                  */
 633                 /* not possessing IPC, afterall (what about corosync CPG?) */
 634                 stop_child(child, SIGKILL);
 635             }
 636             rc = pcmk_rc_ok;
 637             child->pid = ipc_pid;
 638         } else if (ret == pcmk_rc_ok) {
 639             // Our tracked child's PID was found active, but not its IPC
 640             rc = pcmk_rc_ipc_pid_only;
 641         } else if ((child->pid == 0) && (ret == EINVAL)) {
 642             // FreeBSD can return EINVAL
 643             rc = pcmk_rc_ipc_unresponsive;
 644         } else {
 645             switch (ret) {
 646                 case EACCES:
 647                     rc = pcmk_rc_ipc_unauthorized;
 648                     break;
 649                 case ESRCH:
 650                     rc = pcmk_rc_ipc_unresponsive;
 651                     break;
 652                 default:
 653                     rc = ret;
 654                     break;
 655             }
 656         }
 657     }
 658     return rc;
 659 }
 660 
 661 /*!
 662  * \internal
 663  * \brief Initial one-off check of the pre-existing "child" processes
 664  *
 665  * With "child" process, we mean the subdaemon that defines an API end-point
 666  * (all of them do as of the comment) -- the possible complement is skipped
 667  * as it is deemed it has no such shared resources to cause conflicts about,
 668  * hence it can presumably be started anew without hesitation.
 669  * If that won't hold true in the future, the concept of a shared resource
 670  * will have to be generalized beyond the API end-point.
 671  *
 672  * For boundary cases that the "child" is still starting (IPC end-point is yet
 673  * to be witnessed), or more rarely (practically FreeBSD only), when there's
 674  * a pre-existing "untrackable" authentic process, we give the situation some
 675  * time to possibly unfold in the right direction, meaning that said socket
 676  * will appear or the unattainable process will disappear per the observable
 677  * IPC, respectively.
 678  *
 679  * \return Standard Pacemaker return code
 680  *
 681  * \note Since this gets run at the very start, \c respawn_count fields
 682  *       for particular children get temporarily overloaded with "rounds
 683  *       of waiting" tracking, restored once we are about to finish with
 684  *       success (i.e. returning value >=0) and will remain unrestored
 685  *       otherwise.  One way to suppress liveness detection logic for
 686  *       particular child is to set the said value to a negative number.
 687  */
 688 #define WAIT_TRIES 4  /* together with interleaved sleeps, worst case ~ 1s */
 689 int
 690 find_and_track_existing_processes(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 691 {
 692     bool wait_in_progress;
 693     int rc;
 694     size_t i, rounds;
 695 
 696     for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
 697         wait_in_progress = false;
 698         for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 699 
 700             if ((pcmk_children[i].endpoint == NULL)
 701                 || (pcmk_children[i].respawn_count < 0)) {
 702                 continue;
 703             }
 704 
 705             rc = child_liveness(&pcmk_children[i]);
 706             if (rc == pcmk_rc_ipc_unresponsive) {
 707                 /* As a speculation, don't give up if there are more rounds to
 708                  * come for other reasons, but don't artificially wait just
 709                  * because of this, since we would preferably start ASAP.
 710                  */
 711                 continue;
 712             }
 713 
 714             // @TODO Functionize more of this to reduce nesting
 715             pcmk_children[i].respawn_count = rounds;
 716             switch (rc) {
 717                 case pcmk_rc_ok:
 718                     if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
 719                         if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
 720                             crm_crit("Cannot reliably track pre-existing"
 721                                      " authentic process behind %s IPC on this"
 722                                      " platform and PCMK_" PCMK__ENV_FAIL_FAST
 723                                      " requested",
 724                                      pcmk_children[i].endpoint);
 725                             return EOPNOTSUPP;
 726                         } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
 727                             crm_notice("Assuming pre-existing authentic, though"
 728                                        " on this platform untrackable, process"
 729                                        " behind %s IPC is stable (was in %d"
 730                                        " previous samples) so rather than"
 731                                        " bailing out (PCMK_" PCMK__ENV_FAIL_FAST
 732                                        " not requested), we just switch to a"
 733                                        " less optimal IPC liveness monitoring"
 734                                        " (not very suitable for heavy load)",
 735                                        pcmk_children[i].name, WAIT_TRIES - 1);
 736                             crm_warn("The process behind %s IPC cannot be"
 737                                      " terminated, so the overall shutdown"
 738                                      " will get delayed implicitly (%ld s),"
 739                                      " which serves as a graceful period for"
 740                                      " its native termination if it vitally"
 741                                      " depends on some other daemons going"
 742                                      " down in a controlled way already",
 743                                      pcmk_children[i].name,
 744                                      (long) SHUTDOWN_ESCALATION_PERIOD);
 745                         } else {
 746                             wait_in_progress = true;
 747                             crm_warn("Cannot reliably track pre-existing"
 748                                      " authentic process behind %s IPC on this"
 749                                      " platform, can still disappear in %d"
 750                                      " attempt(s)", pcmk_children[i].endpoint,
 751                                      WAIT_TRIES - pcmk_children[i].respawn_count);
 752                             continue;
 753                         }
 754                     }
 755                     crm_notice("Tracking existing %s process (pid=%lld)",
 756                                pcmk_children[i].name,
 757                                (long long) PCMK__SPECIAL_PID_AS_0(
 758                                                pcmk_children[i].pid));
 759                     pcmk_children[i].respawn_count = -1;  /* 0~keep watching */
 760                     pcmk_children[i].flags |= child_active_before_startup;
 761                     break;
 762                 case pcmk_rc_ipc_pid_only:
 763                     if (pcmk_children[i].respawn_count == WAIT_TRIES) {
 764                         crm_crit("%s IPC end-point for existing authentic"
 765                                  " process %lld did not (re)appear",
 766                                  pcmk_children[i].endpoint,
 767                                  (long long) PCMK__SPECIAL_PID_AS_0(
 768                                                  pcmk_children[i].pid));
 769                         return rc;
 770                     }
 771                     wait_in_progress = true;
 772                     crm_warn("Cannot find %s IPC end-point for existing"
 773                              " authentic process %lld, can still (re)appear"
 774                              " in %d attempts (?)",
 775                              pcmk_children[i].endpoint,
 776                              (long long) PCMK__SPECIAL_PID_AS_0(
 777                                              pcmk_children[i].pid),
 778                              WAIT_TRIES - pcmk_children[i].respawn_count);
 779                     continue;
 780                 default:
 781                     crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d",
 782                              pcmk_children[i].name, pcmk_rc_str(rc), rc);
 783                     return rc;
 784             }
 785         }
 786         if (!wait_in_progress) {
 787             break;
 788         }
 789         pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen
 790     }
 791     for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 792         pcmk_children[i].respawn_count = 0;  /* restore pristine state */
 793     }
 794 
 795     g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon,
 796                           NULL);
 797     return pcmk_rc_ok;
 798 }
 799 
 800 gboolean
 801 init_children_processes(void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 802 {
 803     if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
 804         /* Corosync clusters can drop root group access, because we set
 805          * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect
 806          * to corosync.
 807          */
 808         need_root_group = false;
 809     }
 810 
 811     /* start any children that have not been detected */
 812     for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 813         if (pcmk_children[i].pid != 0) {
 814             /* we are already tracking it */
 815             continue;
 816         }
 817 
 818         start_child(&(pcmk_children[i]));
 819     }
 820 
 821     /* From this point on, any daemons being started will be due to
 822      * respawning rather than node start.
 823      *
 824      * This may be useful for the daemons to know
 825      */
 826     pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false);
 827     pacemakerd_state = PCMK__VALUE_RUNNING;
 828     return TRUE;
 829 }
 830 
 831 void
 832 pcmk_shutdown(int nsig)
     /* [previous][next][first][last][top][bottom][index][help] */
 833 {
 834     if (shutdown_trigger == NULL) {
 835         shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
 836     }
 837     mainloop_set_trigger(shutdown_trigger);
 838 }
 839 
 840 void
 841 restart_cluster_subdaemons(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 842 {
 843     for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 844         if (!pcmk_is_set(pcmk_children[i].flags, child_needs_retry) || pcmk_children[i].pid != 0) {
 845             continue;
 846         }
 847 
 848         crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name);
 849         if (start_child(&pcmk_children[i])) {
 850             pcmk_children[i].flags &= ~child_needs_retry;
 851         }
 852     }
 853 }
 854 
 855 static gboolean
 856 stop_child(pcmk_child_t * child, int signal)
     /* [previous][next][first][last][top][bottom][index][help] */
 857 {
 858     if (signal == 0) {
 859         signal = SIGTERM;
 860     }
 861 
 862     /* why to skip PID of 1?
 863        - FreeBSD ~ how untrackable process behind IPC is masqueraded as
 864        - elsewhere: how "init" task is designated; in particular, in systemd
 865          arrangement of socket-based activation, this is pretty real */
 866     if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
 867         crm_debug("Nothing to do for child \"%s\" (process %lld)",
 868                   child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
 869         return TRUE;
 870     }
 871 
 872     if (child->pid <= 0) {
 873         crm_trace("Client %s not running", child->name);
 874         return TRUE;
 875     }
 876 
 877     errno = 0;
 878     if (kill(child->pid, signal) == 0) {
 879         crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld",
 880                    child->name, signal, (long long) child->pid);
 881 
 882     } else {
 883         crm_err("Could not stop %s (process %lld) with signal %d: %s",
 884                 child->name, (long long) child->pid, signal, strerror(errno));
 885     }
 886 
 887     return TRUE;
 888 }
 889 

/* [previous][next][first][last][top][bottom][index][help] */