root/daemons/pacemakerd/pcmkd_subdaemons.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. subdaemon_path
  2. pcmkd_cluster_connected
  3. check_next_subdaemon
  4. escalate_shutdown
  5. pcmk_child_exit
  6. pcmk_process_exit
  7. pcmk_shutdown_worker
  8. start_child
  9. child_liveness
  10. find_and_track_existing_processes
  11. init_children_processes
  12. pcmk_shutdown
  13. restart_cluster_subdaemons
  14. stop_child

   1 /*
   2  * Copyright 2010-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include "pacemakerd.h"
  12 
  13 #if SUPPORT_COROSYNC
  14 #include "pcmkd_corosync.h"
  15 #endif
  16 
  17 #include <errno.h>
  18 #include <grp.h>
  19 #include <signal.h>
  20 #include <stdbool.h>
  21 #include <stdio.h>
  22 #include <stdlib.h>
  23 #include <string.h>
  24 #include <sys/types.h>
  25 #include <time.h>
  26 #include <unistd.h>
  27 
  28 #include <crm/cluster.h>
  29 #include <crm/common/xml.h>
  30 
  31 enum child_daemon_flags {
  32     child_none                  = 0,
  33     child_respawn               = 1 << 0,
  34     child_needs_cluster         = 1 << 1,
  35     child_needs_retry           = 1 << 2,
  36     child_active_before_startup = 1 << 3,
  37 };
  38 
  39 typedef struct pcmk_child_s {
  40     enum pcmk_ipc_server server;
  41     pid_t pid;
  42     int respawn_count;
  43     const char *uid;
  44     int check_count;
  45     uint32_t flags;
  46 } pcmk_child_t;
  47 
  48 #define PCMK_PROCESS_CHECK_INTERVAL 1000    /* 1s */
  49 #define PCMK_PROCESS_CHECK_RETRIES  5
  50 #define SHUTDOWN_ESCALATION_PERIOD  180000  /* 3m */
  51 
  52 /* Index into the array below */
  53 #define PCMK_CHILD_CONTROLD  5
  54 
  55 static pcmk_child_t pcmk_children[] = {
  56     {
  57         pcmk_ipc_based, 0, 0, CRM_DAEMON_USER,
  58         0, child_respawn | child_needs_cluster
  59     },
  60     {
  61         pcmk_ipc_fenced, 0, 0, NULL,
  62         0, child_respawn | child_needs_cluster
  63     },
  64     {
  65         pcmk_ipc_execd, 0, 0, NULL,
  66         0, child_respawn
  67     },
  68     {
  69         pcmk_ipc_attrd, 0, 0, CRM_DAEMON_USER,
  70         0, child_respawn | child_needs_cluster
  71     },
  72     {
  73         pcmk_ipc_schedulerd, 0, 0, CRM_DAEMON_USER,
  74         0, child_respawn
  75     },
  76     {
  77         pcmk_ipc_controld, 0, 0, CRM_DAEMON_USER,
  78         0, child_respawn | child_needs_cluster
  79     },
  80 };
  81 
  82 static char *opts_default[] = { NULL, NULL };
  83 static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
  84 
  85 crm_trigger_t *shutdown_trigger = NULL;
  86 crm_trigger_t *startup_trigger = NULL;
  87 time_t subdaemon_check_progress = 0;
  88 
  89 // Whether we need root group access to talk to cluster layer
  90 static bool need_root_group = true;
  91 
  92 /* When contacted via pacemakerd-api by a client having sbd in
  93  * the name we assume it is sbd-daemon which wants to know
  94  * if pacemakerd shutdown gracefully.
  95  * Thus when everything is shutdown properly pacemakerd
  96  * waits till it has reported the graceful completion of
  97  * shutdown to sbd and just when sbd-client closes the
  98  * connection we can assume that the report has arrived
  99  * properly so that pacemakerd can finally exit.
 100  * Following two variables are used to track that handshake.
 101  */
 102 unsigned int shutdown_complete_state_reported_to = 0;
 103 gboolean shutdown_complete_state_reported_client_closed = FALSE;
 104 
 105 /* state we report when asked via pacemakerd-api status-ping */
 106 const char *pacemakerd_state = PCMK__VALUE_INIT;
 107 gboolean running_with_sbd = FALSE; /* local copy */
 108 
 109 GMainLoop *mainloop = NULL;
 110 
 111 static gboolean fatal_error = FALSE;
 112 
 113 static int child_liveness(pcmk_child_t *child);
 114 static gboolean escalate_shutdown(gpointer data);
 115 static int start_child(pcmk_child_t * child);
 116 static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
 117 static void pcmk_process_exit(pcmk_child_t * child);
 118 static gboolean pcmk_shutdown_worker(gpointer user_data);
 119 static gboolean stop_child(pcmk_child_t * child, int signal);
 120 
 121 /*!
 122  * \internal
 123  * \brief Get path to subdaemon executable
 124  *
 125  * \param[in] subdaemon  Subdaemon to get path for
 126  *
 127  * \return Newly allocated string with path to subdaemon executable
 128  * \note It is the caller's responsibility to free() the return value
 129  */
 130 static inline char *
 131 subdaemon_path(pcmk_child_t *subdaemon)
     /* [previous][next][first][last][top][bottom][index][help] */
 132 {
 133     return crm_strdup_printf(CRM_DAEMON_DIR "/%s",
 134                              pcmk__server_name(subdaemon->server));
 135 }
 136 
 137 static bool
 138 pcmkd_cluster_connected(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 139 {
 140 #if SUPPORT_COROSYNC
 141     return pcmkd_corosync_connected();
 142 #else
 143     return true;
 144 #endif
 145 }
 146 
 147 static gboolean
 148 check_next_subdaemon(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 149 {
 150     static int next_child = 0;
 151 
 152     pcmk_child_t *child = &(pcmk_children[next_child]);
 153     const char *name = pcmk__server_name(child->server);
 154     const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid);
 155     int rc = child_liveness(child);
 156 
 157     crm_trace("Checked subdaemon %s[%lld]: %s (%d)",
 158               name, pid, pcmk_rc_str(rc), rc);
 159 
 160     switch (rc) {
 161         case pcmk_rc_ok:
 162             child->check_count = 0;
 163             subdaemon_check_progress = time(NULL);
 164             break;
 165 
 166         case pcmk_rc_ipc_pid_only: // Child was previously OK
 167             if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) {
 168                 // cts-lab looks for this message
 169                 crm_crit("Subdaemon %s[%lld] is unresponsive to IPC "
 170                          "after %d attempt%s and will now be killed",
 171                          name, pid, child->check_count,
 172                          pcmk__plural_s(child->check_count));
 173                 stop_child(child, SIGKILL);
 174                 if (pcmk_is_set(child->flags, child_respawn)) {
 175                     // Respawn limit hasn't been reached, so retry another round
 176                     child->check_count = 0;
 177                 }
 178             } else {
 179                 crm_notice("Subdaemon %s[%lld] is unresponsive to IPC "
 180                            "after %d attempt%s (will recheck later)",
 181                            name, pid, child->check_count,
 182                            pcmk__plural_s(child->check_count));
 183                 if (pcmk_is_set(child->flags, child_respawn)) {
 184                     /* as long as the respawn-limit isn't reached
 185                        and we haven't run out of connect retries
 186                        we account this as progress we are willing
 187                        to tell to sbd
 188                      */
 189                     subdaemon_check_progress = time(NULL);
 190                 }
 191             }
 192             /* go to the next child and see if
 193                we can make progress there
 194              */
 195             break;
 196         case pcmk_rc_ipc_unresponsive:
 197             if (!pcmk_is_set(child->flags, child_respawn)) {
 198                 /* if a subdaemon is down and we don't want it
 199                    to be restarted this is a success during
 200                    shutdown. if it isn't restarted anymore
 201                    due to MAX_RESPAWN it is
 202                    rather no success.
 203                  */
 204                 if (child->respawn_count <= MAX_RESPAWN) {
 205                     subdaemon_check_progress = time(NULL);
 206                 }
 207             }
 208             if (!pcmk_is_set(child->flags, child_active_before_startup)) {
 209                 crm_trace("Subdaemon %s[%lld] terminated", name, pid);
 210                 break;
 211             }
 212             if (pcmk_is_set(child->flags, child_respawn)) {
 213                 // cts-lab looks for this message
 214                 crm_err("Subdaemon %s[%lld] terminated", name, pid);
 215             } else {
 216                 /* orderly shutdown */
 217                 crm_notice("Subdaemon %s[%lld] terminated", name, pid);
 218             }
 219             pcmk_process_exit(child);
 220             break;
 221         default:
 222             crm_exit(CRM_EX_FATAL);
 223             break;  /* static analysis/noreturn */
 224     }
 225 
 226     if (++next_child >= PCMK__NELEM(pcmk_children)) {
 227         next_child = 0;
 228     }
 229 
 230     return G_SOURCE_CONTINUE;
 231 }
 232 
 233 static gboolean
 234 escalate_shutdown(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 235 {
 236     pcmk_child_t *child = data;
 237 
 238     if (child->pid == PCMK__SPECIAL_PID) {
 239         pcmk_process_exit(child);
 240 
 241     } else if (child->pid != 0) {
 242         /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
 243         crm_err("Subdaemon %s not terminating in a timely manner, forcing",
 244                 pcmk__server_name(child->server));
 245         stop_child(child, SIGSEGV);
 246     }
 247     return FALSE;
 248 }
 249 
 250 static void
 251 pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
     /* [previous][next][first][last][top][bottom][index][help] */
 252 {
 253     pcmk_child_t *child = mainloop_child_userdata(p);
 254     const char *name = mainloop_child_name(p);
 255 
 256     if (signo) {
 257         // cts-lab looks for this message
 258         do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
 259                    "%s[%d] terminated with signal %d (%s)%s",
 260                    name, pid, signo, strsignal(signo),
 261                    (core? " and dumped core" : ""));
 262 
 263     } else {
 264         switch(exitcode) {
 265             case CRM_EX_OK:
 266                 crm_info("%s[%d] exited with status %d (%s)",
 267                          name, pid, exitcode, crm_exit_str(exitcode));
 268                 break;
 269 
 270             case CRM_EX_FATAL:
 271                 crm_warn("Shutting cluster down because %s[%d] had fatal failure",
 272                          name, pid);
 273                 child->flags &= ~child_respawn;
 274                 fatal_error = TRUE;
 275                 pcmk_shutdown(SIGTERM);
 276                 break;
 277 
 278             case CRM_EX_PANIC:
 279                 {
 280                     char *msg = NULL;
 281 
 282                     child->flags &= ~child_respawn;
 283                     fatal_error = TRUE;
 284                     msg = crm_strdup_printf("Subdaemon %s[%d] requested panic",
 285                                             name, pid);
 286                     pcmk__panic(msg);
 287 
 288                     // Should never get here
 289                     free(msg);
 290                     pcmk_shutdown(SIGTERM);
 291                 }
 292                 break;
 293 
 294             default:
 295                 // cts-lab looks for this message
 296                 crm_err("%s[%d] exited with status %d (%s)",
 297                         name, pid, exitcode, crm_exit_str(exitcode));
 298                 break;
 299         }
 300     }
 301 
 302     pcmk_process_exit(child);
 303 }
 304 
 305 static void
 306 pcmk_process_exit(pcmk_child_t * child)
     /* [previous][next][first][last][top][bottom][index][help] */
 307 {
 308     const char *name = pcmk__server_name(child->server);
 309     child->pid = 0;
 310     child->flags &= ~child_active_before_startup;
 311     child->check_count = 0;
 312 
 313     child->respawn_count += 1;
 314     if (child->respawn_count > MAX_RESPAWN) {
 315         crm_err("Subdaemon %s exceeded maximum respawn count", name);
 316         child->flags &= ~child_respawn;
 317     }
 318 
 319     if (shutdown_trigger) {
 320         /* resume step-wise shutdown (returned TRUE yields no parallelizing) */
 321         mainloop_set_trigger(shutdown_trigger);
 322 
 323     } else if (!pcmk_is_set(child->flags, child_respawn)) {
 324         /* nothing to do */
 325 
 326     } else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
 327         pcmk__panic("Subdaemon failed");
 328 
 329     } else if (child_liveness(child) == pcmk_rc_ok) {
 330         crm_warn("Not respawning subdaemon %s because IPC endpoint %s is OK",
 331                  name, pcmk__server_ipc_name(child->server));
 332 
 333     } else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) {
 334         crm_notice("Not respawning subdaemon %s until cluster returns", name);
 335         child->flags |= child_needs_retry;
 336 
 337     } else {
 338         // cts-lab looks for this message
 339         crm_notice("Respawning subdaemon %s after unexpected exit", name);
 340         start_child(child);
 341     }
 342 }
 343 
 344 static gboolean
 345 pcmk_shutdown_worker(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 346 {
 347     static int phase = PCMK__NELEM(pcmk_children) - 1;
 348     static time_t next_log = 0;
 349 
 350     if (phase == PCMK__NELEM(pcmk_children) - 1) {
 351         crm_notice("Shutting down Pacemaker");
 352         pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN;
 353     }
 354 
 355     for (; phase >= 0; phase--) {
 356         pcmk_child_t *child = &(pcmk_children[phase]);
 357         const char *name = pcmk__server_name(child->server);
 358 
 359         if (child->pid != 0) {
 360             time_t now = time(NULL);
 361 
 362             if (pcmk_is_set(child->flags, child_respawn)) {
 363                 if (child->pid == PCMK__SPECIAL_PID) {
 364                     crm_warn("Subdaemon %s cannot be terminated (shutdown "
 365                              "will be escalated after %ld seconds if it does "
 366                              "not terminate on its own; set PCMK_"
 367                              PCMK__ENV_FAIL_FAST "=1 to exit immediately "
 368                              "instead)",
 369                              name, (long) SHUTDOWN_ESCALATION_PERIOD);
 370                 }
 371                 next_log = now + 30;
 372                 child->flags &= ~child_respawn;
 373                 stop_child(child, SIGTERM);
 374                 if (phase < PCMK_CHILD_CONTROLD) {
 375                     pcmk__create_timer(SHUTDOWN_ESCALATION_PERIOD,
 376                                        escalate_shutdown, child);
 377                 }
 378 
 379             } else if (now >= next_log) {
 380                 next_log = now + 30;
 381                 crm_notice("Still waiting for subdaemon %s to terminate "
 382                            QB_XS " pid=%lld", name, (long long) child->pid);
 383             }
 384             return TRUE;
 385         }
 386 
 387         /* cleanup */
 388         crm_debug("Subdaemon %s confirmed stopped", name);
 389         child->pid = 0;
 390     }
 391 
 392     crm_notice("Shutdown complete");
 393     pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE;
 394     if (!fatal_error && running_with_sbd &&
 395         pcmk__get_sbd_sync_resource_startup() &&
 396         !shutdown_complete_state_reported_client_closed) {
 397         crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
 398         return TRUE;
 399     }
 400 
 401     g_main_loop_quit(mainloop);
 402 
 403     if (fatal_error) {
 404         crm_notice("Shutting down and staying down after fatal error");
 405 #if SUPPORT_COROSYNC
 406         pcmkd_shutdown_corosync();
 407 #endif
 408         crm_exit(CRM_EX_FATAL);
 409     }
 410 
 411     return TRUE;
 412 }
 413 
 414 /* TODO once libqb is taught to juggle with IPC end-points carried over as
 415         bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
 416         it shall hand over these descriptors here if/once they are successfully
 417         pre-opened in (presumably) child_liveness(), to avoid any remaining
 418         room for races */
 419  // \return Standard Pacemaker return code
 420 static int
 421 start_child(pcmk_child_t * child)
     /* [previous][next][first][last][top][bottom][index][help] */
 422 {
 423     uid_t uid = 0;
 424     gid_t gid = 0;
 425     gboolean use_valgrind = FALSE;
 426     gboolean use_callgrind = FALSE;
 427     const char *name = pcmk__server_name(child->server);
 428     const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED);
 429     const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED);
 430 
 431     child->flags &= ~child_active_before_startup;
 432     child->check_count = 0;
 433 
 434     if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
 435         use_callgrind = TRUE;
 436         use_valgrind = TRUE;
 437 
 438     } else if ((env_callgrind != NULL)
 439                && (strstr(env_callgrind, name) != NULL)) {
 440         use_callgrind = TRUE;
 441         use_valgrind = TRUE;
 442 
 443     } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
 444         use_valgrind = TRUE;
 445 
 446     } else if ((env_valgrind != NULL)
 447                && (strstr(env_valgrind, name) != NULL)) {
 448         use_valgrind = TRUE;
 449     }
 450 
 451     if (use_valgrind && strlen(PCMK__VALGRIND_EXEC) == 0) {
 452         crm_warn("Cannot enable valgrind for subdaemon %s: valgrind not found",
 453                  name);
 454         use_valgrind = FALSE;
 455     }
 456 
 457     if ((child->uid != NULL) && (crm_user_lookup(child->uid, &uid, &gid) < 0)) {
 458         crm_err("Invalid user (%s) for subdaemon %s: not found",
 459                 child->uid, name);
 460         return EACCES;
 461     }
 462 
 463     child->pid = fork();
 464     pcmk__assert(child->pid != -1);
 465 
 466     if (child->pid > 0) {
 467         /* parent */
 468         mainloop_child_add(child->pid, 0, name, child, pcmk_child_exit);
 469 
 470         if (use_valgrind) {
 471             crm_info("Forked process %lld using user %lu (%s) and group %lu "
 472                      "for subdaemon %s (valgrind enabled: %s)",
 473                      (long long) child->pid, (unsigned long) uid,
 474                      pcmk__s(child->uid, "root"), (unsigned long) gid, name,
 475                      PCMK__VALGRIND_EXEC);
 476         } else {
 477             crm_info("Forked process %lld using user %lu (%s) and group %lu "
 478                      "for subdaemon %s",
 479                      (long long) child->pid, (unsigned long) uid,
 480                      pcmk__s(child->uid, "root"), (unsigned long) gid, name);
 481         }
 482 
 483         return pcmk_rc_ok;
 484 
 485     } else {
 486         /* Start a new session */
 487         (void)setsid();
 488 
 489         /* Setup the two alternate arg arrays */
 490         opts_vgrind[0] = pcmk__str_copy(PCMK__VALGRIND_EXEC);
 491         if (use_callgrind) {
 492             opts_vgrind[1] = pcmk__str_copy("--tool=callgrind");
 493             opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file="
 494                                             CRM_STATE_DIR "/callgrind.out.%p");
 495             opts_vgrind[3] = subdaemon_path(child);
 496             opts_vgrind[4] = NULL;
 497         } else {
 498             opts_vgrind[1] = subdaemon_path(child);
 499             opts_vgrind[2] = NULL;
 500             opts_vgrind[3] = NULL;
 501             opts_vgrind[4] = NULL;
 502         }
 503         opts_default[0] = subdaemon_path(child);
 504 
 505         if(gid) {
 506             // Drop root group access if not needed
 507             if (!need_root_group && (setgid(gid) < 0)) {
 508                 crm_warn("Could not set subdaemon %s group to %lu: %s",
 509                          name, (unsigned long) gid, strerror(errno));
 510             }
 511 
 512             /* Initialize supplementary groups to only those always granted to
 513              * the user, plus haclient (so we can access IPC).
 514              */
 515             if (initgroups(child->uid, gid) < 0) {
 516                 crm_err("Cannot initialize system groups for subdaemon %s: %s "
 517                         QB_XS " errno=%d",
 518                         name, pcmk_rc_str(errno), errno);
 519             }
 520         }
 521 
 522         if (uid && setuid(uid) < 0) {
 523             crm_warn("Could not set subdaemon %s user to %s: %s "
 524                      QB_XS " uid=%lu errno=%d",
 525                      name, strerror(errno), child->uid, (unsigned long) uid,
 526                      errno);
 527         }
 528 
 529         pcmk__close_fds_in_child(true);
 530 
 531         pcmk__open_devnull(O_RDONLY);   // stdin (fd 0)
 532         pcmk__open_devnull(O_WRONLY);   // stdout (fd 1)
 533         pcmk__open_devnull(O_WRONLY);   // stderr (fd 2)
 534 
 535         if (use_valgrind) {
 536             (void)execvp(PCMK__VALGRIND_EXEC, opts_vgrind);
 537         } else {
 538             char *path = subdaemon_path(child);
 539 
 540             (void) execvp(path, opts_default);
 541             free(path);
 542         }
 543         crm_crit("Could not execute subdaemon %s: %s", name, strerror(errno));
 544         crm_exit(CRM_EX_FATAL);
 545     }
 546     return pcmk_rc_ok;          /* never reached */
 547 }
 548 
 549 /*!
 550  * \internal
 551  * \brief Check the liveness of the child based on IPC name and PID if tracked
 552  *
 553  * \param[in,out] child  Child tracked data
 554  *
 555  * \return Standard Pacemaker return code
 556  *
 557  * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
 558  *       indicating that no trace of IPC liveness was detected,
 559  *       pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
 560  *       an unauthorized process, and pcmk_rc_ipc_pid_only indicating that
 561  *       the child is up by PID but not IPC end-point (possibly starting).
 562  * \note This function doesn't modify any of \p child members but \c pid,
 563  *       and is not actively toying with processes as such but invoking
 564  *       \c stop_child in one particular case (there's for some reason
 565  *       a different authentic holder of the IPC end-point).
 566  */
 567 static int
 568 child_liveness(pcmk_child_t *child)
     /* [previous][next][first][last][top][bottom][index][help] */
 569 {
 570     uid_t cl_uid = 0;
 571     gid_t cl_gid = 0;
 572     const uid_t root_uid = 0;
 573     const gid_t root_gid = 0;
 574     const uid_t *ref_uid;
 575     const gid_t *ref_gid;
 576     const char *name = pcmk__server_name(child->server);
 577     int rc = pcmk_rc_ipc_unresponsive;
 578     int legacy_rc = pcmk_ok;
 579     pid_t ipc_pid = 0;
 580 
 581     if (child->uid == NULL) {
 582         ref_uid = &root_uid;
 583         ref_gid = &root_gid;
 584     } else {
 585         ref_uid = &cl_uid;
 586         ref_gid = &cl_gid;
 587         legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
 588     }
 589 
 590     if (legacy_rc < 0) {
 591         rc = pcmk_legacy2rc(legacy_rc);
 592         crm_err("Could not find user and group IDs for user %s: %s "
 593                 QB_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
 594     } else {
 595         const char *ipc_name = pcmk__server_ipc_name(child->server);
 596 
 597         rc = pcmk__ipc_is_authentic_process_active(ipc_name,
 598                                                    *ref_uid, *ref_gid,
 599                                                    &ipc_pid);
 600         if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
 601             if (child->pid <= 0) {
 602                 /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this
 603                  * initializes a new child. If rc is
 604                  * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will
 605                  * investigate further.
 606                  */
 607                 child->pid = ipc_pid;
 608             } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
 609                 /* An unexpected (but authorized) process is responding to
 610                  * IPC. Investigate further.
 611                  */
 612                 rc = pcmk_rc_ipc_unresponsive;
 613             }
 614         }
 615     }
 616 
 617     if (rc == pcmk_rc_ipc_unresponsive) {
 618         /* If we get here, a child without IPC is being tracked, no IPC liveness
 619          * has been detected, or IPC liveness has been detected with an
 620          * unexpected (but authorized) process. This is safe on FreeBSD since
 621          * the only change possible from a proper child's PID into "special" PID
 622          * of 1 behind more loosely related process.
 623          */
 624         int ret = pcmk__pid_active(child->pid, name);
 625 
 626         if (ipc_pid && ((ret != pcmk_rc_ok)
 627                         || ipc_pid == PCMK__SPECIAL_PID
 628                         || (pcmk__pid_active(ipc_pid, name) == pcmk_rc_ok))) {
 629             /* An unexpected (but authorized) process was detected at the IPC
 630              * endpoint, and either it is active, or the child we're tracking is
 631              * not.
 632              */
 633 
 634             if (ret == pcmk_rc_ok) {
 635                 /* The child we're tracking is active. Kill it, and adopt the
 636                  * detected process. This assumes that our children don't fork
 637                  * (thus getting a different PID owning the IPC), but rather the
 638                  * tracking got out of sync because of some means external to
 639                  * Pacemaker, and adopting the detected process is better than
 640                  * killing it and possibly having to spawn a new child.
 641                  */
 642                 /* not possessing IPC, afterall (what about corosync CPG?) */
 643                 stop_child(child, SIGKILL);
 644             }
 645             rc = pcmk_rc_ok;
 646             child->pid = ipc_pid;
 647         } else if (ret == pcmk_rc_ok) {
 648             // Our tracked child's PID was found active, but not its IPC
 649             rc = pcmk_rc_ipc_pid_only;
 650         } else if ((child->pid == 0) && (ret == EINVAL)) {
 651             // FreeBSD can return EINVAL
 652             rc = pcmk_rc_ipc_unresponsive;
 653         } else {
 654             switch (ret) {
 655                 case EACCES:
 656                     rc = pcmk_rc_ipc_unauthorized;
 657                     break;
 658                 case ESRCH:
 659                     rc = pcmk_rc_ipc_unresponsive;
 660                     break;
 661                 default:
 662                     rc = ret;
 663                     break;
 664             }
 665         }
 666     }
 667     return rc;
 668 }
 669 
 670 /*!
 671  * \internal
 672  * \brief Initial one-off check of the pre-existing "child" processes
 673  *
 674  * With "child" process, we mean the subdaemon that defines an API end-point
 675  * (all of them do as of the comment) -- the possible complement is skipped
 676  * as it is deemed it has no such shared resources to cause conflicts about,
 677  * hence it can presumably be started anew without hesitation.
 678  * If that won't hold true in the future, the concept of a shared resource
 679  * will have to be generalized beyond the API end-point.
 680  *
 681  * For boundary cases that the "child" is still starting (IPC end-point is yet
 682  * to be witnessed), or more rarely (practically FreeBSD only), when there's
 683  * a pre-existing "untrackable" authentic process, we give the situation some
 684  * time to possibly unfold in the right direction, meaning that said socket
 685  * will appear or the unattainable process will disappear per the observable
 686  * IPC, respectively.
 687  *
 688  * \return Standard Pacemaker return code
 689  *
 690  * \note Since this gets run at the very start, \c respawn_count fields
 691  *       for particular children get temporarily overloaded with "rounds
 692  *       of waiting" tracking, restored once we are about to finish with
 693  *       success (i.e. returning value >=0) and will remain unrestored
 694  *       otherwise.  One way to suppress liveness detection logic for
 695  *       particular child is to set the said value to a negative number.
 696  */
 697 #define WAIT_TRIES 4  /* together with interleaved sleeps, worst case ~ 1s */
 698 int
 699 find_and_track_existing_processes(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 700 {
 701     bool wait_in_progress;
 702     int rc;
 703     size_t i, rounds;
 704 
 705     for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
 706         wait_in_progress = false;
 707         for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 708             const char *name = pcmk__server_name(pcmk_children[i].server);
 709             const char *ipc_name = NULL;
 710 
 711             if (pcmk_children[i].respawn_count < 0) {
 712                 continue;
 713             }
 714 
 715             rc = child_liveness(&pcmk_children[i]);
 716             if (rc == pcmk_rc_ipc_unresponsive) {
 717                 /* As a speculation, don't give up if there are more rounds to
 718                  * come for other reasons, but don't artificially wait just
 719                  * because of this, since we would preferably start ASAP.
 720                  */
 721                 continue;
 722             }
 723 
 724             // @TODO Functionize more of this to reduce nesting
 725             ipc_name = pcmk__server_ipc_name(pcmk_children[i].server);
 726             pcmk_children[i].respawn_count = rounds;
 727             switch (rc) {
 728                 case pcmk_rc_ok:
 729                     if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
 730                         if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) {
 731                             crm_crit("Cannot reliably track pre-existing"
 732                                      " authentic process behind %s IPC on this"
 733                                      " platform and PCMK_" PCMK__ENV_FAIL_FAST
 734                                      " requested", ipc_name);
 735                             return EOPNOTSUPP;
 736                         } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
 737                             crm_notice("Assuming pre-existing authentic, though"
 738                                        " on this platform untrackable, process"
 739                                        " behind %s IPC is stable (was in %d"
 740                                        " previous samples) so rather than"
 741                                        " bailing out (PCMK_" PCMK__ENV_FAIL_FAST
 742                                        " not requested), we just switch to a"
 743                                        " less optimal IPC liveness monitoring"
 744                                        " (not very suitable for heavy load)",
 745                                        name, WAIT_TRIES - 1);
 746                             crm_warn("The process behind %s IPC cannot be"
 747                                      " terminated, so the overall shutdown"
 748                                      " will get delayed implicitly (%ld s),"
 749                                      " which serves as a graceful period for"
 750                                      " its native termination if it vitally"
 751                                      " depends on some other daemons going"
 752                                      " down in a controlled way already",
 753                                      name, (long) SHUTDOWN_ESCALATION_PERIOD);
 754                         } else {
 755                             wait_in_progress = true;
 756                             crm_warn("Cannot reliably track pre-existing"
 757                                      " authentic process behind %s IPC on this"
 758                                      " platform, can still disappear in %d"
 759                                      " attempt(s)", ipc_name,
 760                                      WAIT_TRIES - pcmk_children[i].respawn_count);
 761                             continue;
 762                         }
 763                     }
 764                     crm_notice("Tracking existing %s process (pid=%lld)",
 765                                name,
 766                                (long long) PCMK__SPECIAL_PID_AS_0(
 767                                                pcmk_children[i].pid));
 768                     pcmk_children[i].respawn_count = -1;  /* 0~keep watching */
 769                     pcmk_children[i].flags |= child_active_before_startup;
 770                     break;
 771                 case pcmk_rc_ipc_pid_only:
 772                     if (pcmk_children[i].respawn_count == WAIT_TRIES) {
 773                         crm_crit("%s IPC endpoint for existing authentic"
 774                                  " process %lld did not (re)appear",
 775                                  ipc_name,
 776                                  (long long) PCMK__SPECIAL_PID_AS_0(
 777                                                  pcmk_children[i].pid));
 778                         return rc;
 779                     }
 780                     wait_in_progress = true;
 781                     crm_warn("Cannot find %s IPC endpoint for existing"
 782                              " authentic process %lld, can still (re)appear"
 783                              " in %d attempts (?)",
 784                              ipc_name,
 785                              (long long) PCMK__SPECIAL_PID_AS_0(
 786                                              pcmk_children[i].pid),
 787                              WAIT_TRIES - pcmk_children[i].respawn_count);
 788                     continue;
 789                 default:
 790                     crm_crit("Checked liveness of %s: %s " QB_XS " rc=%d",
 791                              name, pcmk_rc_str(rc), rc);
 792                     return rc;
 793             }
 794         }
 795         if (!wait_in_progress) {
 796             break;
 797         }
 798         pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen
 799     }
 800     for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 801         pcmk_children[i].respawn_count = 0;  /* restore pristine state */
 802     }
 803 
 804     pcmk__create_timer(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon,
 805                        NULL);
 806     return pcmk_rc_ok;
 807 }
 808 
 809 gboolean
 810 init_children_processes(void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 811 {
 812     if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
 813         /* Corosync clusters can drop root group access, because we set
 814          * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect
 815          * to corosync.
 816          */
 817         need_root_group = false;
 818     }
 819 
 820     /* start any children that have not been detected */
 821     for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 822         if (pcmk_children[i].pid != 0) {
 823             /* we are already tracking it */
 824             continue;
 825         }
 826 
 827         start_child(&(pcmk_children[i]));
 828     }
 829 
 830     /* From this point on, any daemons being started will be due to
 831      * respawning rather than node start.
 832      *
 833      * This may be useful for the daemons to know
 834      */
 835     pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false);
 836     pacemakerd_state = PCMK__VALUE_RUNNING;
 837     return TRUE;
 838 }
 839 
 840 void
 841 pcmk_shutdown(int nsig)
     /* [previous][next][first][last][top][bottom][index][help] */
 842 {
 843     if (shutdown_trigger == NULL) {
 844         shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
 845     }
 846     mainloop_set_trigger(shutdown_trigger);
 847 }
 848 
 849 void
 850 restart_cluster_subdaemons(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 851 {
 852     for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 853         if (!pcmk_is_set(pcmk_children[i].flags, child_needs_retry) || pcmk_children[i].pid != 0) {
 854             continue;
 855         }
 856 
 857         crm_notice("Respawning cluster-based subdaemon %s",
 858                    pcmk__server_name(pcmk_children[i].server));
 859         if (start_child(&pcmk_children[i])) {
 860             pcmk_children[i].flags &= ~child_needs_retry;
 861         }
 862     }
 863 }
 864 
 865 static gboolean
 866 stop_child(pcmk_child_t * child, int signal)
     /* [previous][next][first][last][top][bottom][index][help] */
 867 {
 868     const char *name = pcmk__server_name(child->server);
 869 
 870     if (signal == 0) {
 871         signal = SIGTERM;
 872     }
 873 
 874     /* why to skip PID of 1?
 875        - FreeBSD ~ how untrackable process behind IPC is masqueraded as
 876        - elsewhere: how "init" task is designated; in particular, in systemd
 877          arrangement of socket-based activation, this is pretty real */
 878     if (child->pid == PCMK__SPECIAL_PID) {
 879         crm_debug("Nothing to do to stop subdaemon %s[%lld]",
 880                   name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
 881         return TRUE;
 882     }
 883 
 884     if (child->pid <= 0) {
 885         crm_trace("Nothing to do to stop subdaemon %s: Not running", name);
 886         return TRUE;
 887     }
 888 
 889     errno = 0;
 890     if (kill(child->pid, signal) == 0) {
 891         crm_notice("Stopping subdaemon %s "
 892                    QB_XS " via signal %d to process %lld",
 893                    name, signal, (long long) child->pid);
 894     } else {
 895         crm_err("Could not stop subdaemon %s[%lld] with signal %d: %s",
 896                 name, (long long) child->pid, signal, strerror(errno));
 897     }
 898 
 899     return TRUE;
 900 }

/* [previous][next][first][last][top][bottom][index][help] */