daemons/pacemakerd/pcmkd

/* */
This source file includes following definitions.
pcmkd_cluster_connected
check_active_before_startup_processes
escalate_shutdown
pcmk_child_exit
pcmk_process_exit
pcmk_shutdown_worker
start_child
child_liveness
find_and_track_existing_processes
init_children_processes
pcmk_shutdown
restart_cluster_subdaemons
stop_child
   1 /*
   2  * Copyright 2010-2021 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include "pacemakerd.h"
  12 
  13 #include <errno.h>
  14 #include <grp.h>
  15 #include <signal.h>
  16 #include <stdbool.h>
  17 #include <stdlib.h>
  18 #include <string.h>
  19 #include <sys/types.h>
  20 #include <time.h>
  21 #include <unistd.h>
  22 
  23 #include <crm/cluster.h>
  24 #include <crm/msg_xml.h>
  25 
  26 typedef struct pcmk_child_s {
  27     pid_t pid;
  28     int respawn_count;
  29     bool respawn;
  30     const char *name;
  31     const char *uid;
  32     const char *command;
  33     const char *endpoint;  /* IPC server name */
  34     bool needs_cluster;
  35 
  36     /* Anything below here will be dynamically initialized */
  37     bool needs_retry;
  38     bool active_before_startup;
  39 } pcmk_child_t;
  40 
  41 #define PCMK_PROCESS_CHECK_INTERVAL 5
  42 #define SHUTDOWN_ESCALATION_PERIOD 180000  /* 3m */
  43 
  44 /* Index into the array below */
  45 #define PCMK_CHILD_CONTROLD  5
  46 
  47 static pcmk_child_t pcmk_children[] = {
  48     {
  49         0, 0, true,  "pacemaker-based", CRM_DAEMON_USER,
  50         CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO,
  51         true
  52     },
  53     {
  54         0, 0, true, "pacemaker-fenced", NULL,
  55         CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng",
  56         true
  57     },
  58     {
  59         0, 0, true,  "pacemaker-execd", NULL,
  60         CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD,
  61         false
  62     },
  63     {
  64         0, 0, true, "pacemaker-attrd", CRM_DAEMON_USER,
  65         CRM_DAEMON_DIR "/pacemaker-attrd", T_ATTRD,
  66         true
  67     },
  68     {
  69         0, 0, true, "pacemaker-schedulerd", CRM_DAEMON_USER,
  70         CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE,
  71         false
  72     },
  73     {
  74         0, 0, true, "pacemaker-controld", CRM_DAEMON_USER,
  75         CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD,
  76         true
  77     },
  78 };
  79 
  80 static char *opts_default[] = { NULL, NULL };
  81 static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
  82 
  83 crm_trigger_t *shutdown_trigger = NULL;
  84 crm_trigger_t *startup_trigger = NULL;
  85 
  86 /* When contacted via pacemakerd-api by a client having sbd in
  87  * the name we assume it is sbd-daemon which wants to know
  88  * if pacemakerd shutdown gracefully.
  89  * Thus when everything is shutdown properly pacemakerd
  90  * waits till it has reported the graceful completion of
  91  * shutdown to sbd and just when sbd-client closes the
  92  * connection we can assume that the report has arrived
  93  * properly so that pacemakerd can finally exit.
  94  * Following two variables are used to track that handshake.
  95  */
  96 unsigned int shutdown_complete_state_reported_to = 0;
  97 gboolean shutdown_complete_state_reported_client_closed = FALSE;
  98 
  99 /* state we report when asked via pacemakerd-api status-ping */
 100 const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
 101 gboolean running_with_sbd = FALSE; /* local copy */
 102 
 103 GMainLoop *mainloop = NULL;
 104 
 105 static gboolean fatal_error = FALSE;
 106 static bool global_keep_tracking = false;
 107 
 108 static gboolean check_active_before_startup_processes(gpointer user_data);
 109 static int child_liveness(pcmk_child_t *child);
 110 static gboolean escalate_shutdown(gpointer data);
 111 static int start_child(pcmk_child_t * child);
 112 static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
 113 static void pcmk_process_exit(pcmk_child_t * child);
 114 static gboolean pcmk_shutdown_worker(gpointer user_data);
 115 static gboolean stop_child(pcmk_child_t * child, int signal);
 116 
 117 static bool
 118 pcmkd_cluster_connected(void)
     /*  */
 119 {
 120 #if SUPPORT_COROSYNC
 121     return pcmkd_corosync_connected();
 122 #else
 123     return true;
 124 #endif
 125 }
 126 
 127 static gboolean
 128 check_active_before_startup_processes(gpointer user_data)
     /*  */
 129 {
 130     gboolean keep_tracking = FALSE;
 131 
 132     for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 133         if (!pcmk_children[i].active_before_startup) {
 134             /* we are already tracking it as a child process. */
 135             continue;
 136         } else {
 137             int rc = child_liveness(&pcmk_children[i]);
 138 
 139             switch (rc) {
 140                 case pcmk_rc_ok:
 141                     break;
 142                 case pcmk_rc_ipc_unresponsive:
 143                 case pcmk_rc_ipc_pid_only: // This case: it was previously OK
 144                     if (pcmk_children[i].respawn) {
 145                         crm_err("%s[%lld] terminated%s", pcmk_children[i].name,
 146                                 (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[i].pid),
 147                                 (rc == pcmk_rc_ipc_pid_only)? " as IPC server" : "");
 148                     } else {
 149                         /* orderly shutdown */
 150                         crm_notice("%s[%lld] terminated%s", pcmk_children[i].name,
 151                                    (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[i].pid),
 152                                    (rc == pcmk_rc_ipc_pid_only)? " as IPC server" : "");
 153                     }
 154                     pcmk_process_exit(&(pcmk_children[i]));
 155                     continue;
 156                 default:
 157                     crm_exit(CRM_EX_FATAL);
 158                     break;  /* static analysis/noreturn */
 159             }
 160         }
 161         /* at least one of the processes found at startup
 162          * is still going, so keep this recurring timer around */
 163         keep_tracking = TRUE;
 164     }
 165 
 166     global_keep_tracking = keep_tracking;
 167     return keep_tracking;
 168 }
 169 
 170 static gboolean
 171 escalate_shutdown(gpointer data)
     /*  */
 172 {
 173     pcmk_child_t *child = data;
 174 
 175     if (child->pid == PCMK__SPECIAL_PID) {
 176         pcmk_process_exit(child);
 177 
 178     } else if (child->pid != 0) {
 179         /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
 180         crm_err("Child %s not terminating in a timely manner, forcing", child->name);
 181         stop_child(child, SIGSEGV);
 182     }
 183     return FALSE;
 184 }
 185 
 186 static void
 187 pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
     /*  */
 188 {
 189     pcmk_child_t *child = mainloop_child_userdata(p);
 190     const char *name = mainloop_child_name(p);
 191 
 192     if (signo) {
 193         do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
 194                    "%s[%d] terminated with signal %d (%s)%s",
 195                    name, pid, signo, strsignal(signo),
 196                    (core? " and dumped core" : ""));
 197 
 198     } else {
 199         switch(exitcode) {
 200             case CRM_EX_OK:
 201                 crm_info("%s[%d] exited with status %d (%s)",
 202                          name, pid, exitcode, crm_exit_str(exitcode));
 203                 break;
 204 
 205             case CRM_EX_FATAL:
 206                 crm_warn("Shutting cluster down because %s[%d] had fatal failure",
 207                          name, pid);
 208                 child->respawn = false;
 209                 fatal_error = TRUE;
 210                 pcmk_shutdown(SIGTERM);
 211                 break;
 212 
 213             case CRM_EX_PANIC:
 214                 crm_emerg("%s[%d] instructed the machine to reset", name, pid);
 215                 child->respawn = false;
 216                 fatal_error = TRUE;
 217                 pcmk__panic(__func__);
 218                 pcmk_shutdown(SIGTERM);
 219                 break;
 220 
 221             default:
 222                 crm_err("%s[%d] exited with status %d (%s)",
 223                         name, pid, exitcode, crm_exit_str(exitcode));
 224                 break;
 225         }
 226     }
 227 
 228     pcmk_process_exit(child);
 229 }
 230 
 231 static void
 232 pcmk_process_exit(pcmk_child_t * child)
     /*  */
 233 {
 234     child->pid = 0;
 235     child->active_before_startup = false;
 236 
 237     child->respawn_count += 1;
 238     if (child->respawn_count > MAX_RESPAWN) {
 239         crm_err("Child respawn count exceeded by %s", child->name);
 240         child->respawn = false;
 241     }
 242 
 243     if (shutdown_trigger) {
 244         /* resume step-wise shutdown (returned TRUE yields no parallelizing) */
 245         mainloop_set_trigger(shutdown_trigger);
 246 
 247     } else if (!child->respawn) {
 248         /* nothing to do */
 249 
 250     } else if (crm_is_true(getenv("PCMK_fail_fast"))) {
 251         crm_err("Rebooting system because of %s", child->name);
 252         pcmk__panic(__func__);
 253 
 254     } else if (child_liveness(child) == pcmk_rc_ok) {
 255         crm_warn("One-off suppressing strict respawning of a child process %s,"
 256                  " appears alright per %s IPC end-point",
 257                  child->name, child->endpoint);
 258         /* need to monitor how it evolves, and start new process if badly */
 259         child->active_before_startup = true;
 260         if (!global_keep_tracking) {
 261             global_keep_tracking = true;
 262             g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
 263                                   check_active_before_startup_processes, NULL);
 264         }
 265 
 266     } else {
 267         if (child->needs_cluster && !pcmkd_cluster_connected()) {
 268             crm_notice("Skipping cluster-based subdaemon %s until cluster returns",
 269                        child->name);
 270             child->needs_retry = true;
 271             return;
 272         }
 273 
 274         crm_notice("Respawning failed child process: %s", child->name);
 275         start_child(child);
 276     }
 277 }
 278 
 279 static gboolean
 280 pcmk_shutdown_worker(gpointer user_data)
     /*  */
 281 {
 282     static int phase = PCMK__NELEM(pcmk_children) - 1;
 283     static time_t next_log = 0;
 284 
 285     if (phase == PCMK__NELEM(pcmk_children) - 1) {
 286         crm_notice("Shutting down Pacemaker");
 287         pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
 288     }
 289 
 290     for (; phase >= 0; phase--) {
 291         pcmk_child_t *child = &(pcmk_children[phase]);
 292 
 293         if (child->pid != 0) {
 294             time_t now = time(NULL);
 295 
 296             if (child->respawn) {
 297                 if (child->pid == PCMK__SPECIAL_PID) {
 298                     crm_warn("The process behind %s IPC cannot be"
 299                              " terminated, so either wait the graceful"
 300                              " period of %ld s for its native termination"
 301                              " if it vitally depends on some other daemons"
 302                              " going down in a controlled way already,"
 303                              " or locate and kill the correct %s process"
 304                              " on your own; set PCMK_fail_fast=1 to avoid"
 305                              " this altogether next time around",
 306                              child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
 307                              child->command);
 308                 }
 309                 next_log = now + 30;
 310                 child->respawn = false;
 311                 stop_child(child, SIGTERM);
 312                 if (phase < PCMK_CHILD_CONTROLD) {
 313                     g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
 314                                   escalate_shutdown, child);
 315                 }
 316 
 317             } else if (now >= next_log) {
 318                 next_log = now + 30;
 319                 crm_notice("Still waiting for %s to terminate "
 320                            CRM_XS " pid=%lld",
 321                            child->name, (long long) child->pid);
 322             }
 323             return TRUE;
 324         }
 325 
 326         /* cleanup */
 327         crm_debug("%s confirmed stopped", child->name);
 328         child->pid = 0;
 329     }
 330 
 331     crm_notice("Shutdown complete");
 332     pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
 333     if (!fatal_error && running_with_sbd &&
 334         pcmk__get_sbd_sync_resource_startup() &&
 335         !shutdown_complete_state_reported_client_closed) {
 336         crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
 337         return TRUE;
 338     }
 339 
 340     {
 341         const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY);
 342         if(delay) {
 343             sync();
 344             pcmk__sleep_ms(crm_get_msec(delay));
 345         }
 346     }
 347 
 348     g_main_loop_quit(mainloop);
 349 
 350     if (fatal_error) {
 351         crm_notice("Shutting down and staying down after fatal error");
 352 #ifdef SUPPORT_COROSYNC
 353         pcmkd_shutdown_corosync();
 354 #endif
 355         crm_exit(CRM_EX_FATAL);
 356     }
 357 
 358     return TRUE;
 359 }
 360 
 361 /* TODO once libqb is taught to juggle with IPC end-points carried over as
 362         bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
 363         it shall hand over these descriptors here if/once they are successfully
 364         pre-opened in (presumably) child_liveness(), to avoid any remaining
 365         room for races */
 366  // \return Standard Pacemaker return code
 367 static int
 368 start_child(pcmk_child_t * child)
     /*  */
 369 {
 370     uid_t uid = 0;
 371     gid_t gid = 0;
 372     gboolean use_valgrind = FALSE;
 373     gboolean use_callgrind = FALSE;
 374     const char *env_valgrind = getenv("PCMK_valgrind_enabled");
 375     const char *env_callgrind = getenv("PCMK_callgrind_enabled");
 376 
 377     child->active_before_startup = false;
 378 
 379     if (child->command == NULL) {
 380         crm_info("Nothing to do for child \"%s\"", child->name);
 381         return pcmk_rc_ok;
 382     }
 383 
 384     if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
 385         use_callgrind = TRUE;
 386         use_valgrind = TRUE;
 387 
 388     } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
 389         use_callgrind = TRUE;
 390         use_valgrind = TRUE;
 391 
 392     } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
 393         use_valgrind = TRUE;
 394 
 395     } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
 396         use_valgrind = TRUE;
 397     }
 398 
 399     if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
 400         crm_warn("Cannot enable valgrind for %s:"
 401                  " The location of the valgrind binary is unknown", child->name);
 402         use_valgrind = FALSE;
 403     }
 404 
 405     if (child->uid) {
 406         if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
 407             crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
 408             return EACCES;
 409         }
 410         crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
 411     }
 412 
 413     child->pid = fork();
 414     CRM_ASSERT(child->pid != -1);
 415 
 416     if (child->pid > 0) {
 417         /* parent */
 418         mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
 419 
 420         crm_info("Forked child %lld for process %s%s",
 421                  (long long) child->pid, child->name,
 422                  use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
 423         return pcmk_rc_ok;
 424 
 425     } else {
 426         /* Start a new session */
 427         (void)setsid();
 428 
 429         /* Setup the two alternate arg arrays */
 430         opts_vgrind[0] = strdup(VALGRIND_BIN);
 431         if (use_callgrind) {
 432             opts_vgrind[1] = strdup("--tool=callgrind");
 433             opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
 434             opts_vgrind[3] = strdup(child->command);
 435             opts_vgrind[4] = NULL;
 436         } else {
 437             opts_vgrind[1] = strdup(child->command);
 438             opts_vgrind[2] = NULL;
 439             opts_vgrind[3] = NULL;
 440             opts_vgrind[4] = NULL;
 441         }
 442         opts_default[0] = strdup(child->command);
 443 
 444         if(gid) {
 445             // Whether we need root group access to talk to cluster layer
 446             bool need_root_group = TRUE;
 447 
 448             if (is_corosync_cluster()) {
 449                 /* Corosync clusters can drop root group access, because we set
 450                  * uidgid.gid.${gid}=1 via CMAP, which allows these processes to
 451                  * connect to corosync.
 452                  */
 453                 need_root_group = FALSE;
 454             }
 455 
 456             // Drop root group access if not needed
 457             if (!need_root_group && (setgid(gid) < 0)) {
 458                 crm_warn("Could not set group to %d: %s", gid, strerror(errno));
 459             }
 460 
 461             /* Initialize supplementary groups to only those always granted to
 462              * the user, plus haclient (so we can access IPC).
 463              */
 464             if (initgroups(child->uid, gid) < 0) {
 465                 crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno);
 466             }
 467         }
 468 
 469         if (uid && setuid(uid) < 0) {
 470             crm_warn("Could not set user to %s (id %d): %s",
 471                      child->uid, uid, strerror(errno));
 472         }
 473 
 474         pcmk__close_fds_in_child(true);
 475 
 476         pcmk__open_devnull(O_RDONLY);   // stdin (fd 0)
 477         pcmk__open_devnull(O_WRONLY);   // stdout (fd 1)
 478         pcmk__open_devnull(O_WRONLY);   // stderr (fd 2)
 479 
 480         if (use_valgrind) {
 481             (void)execvp(VALGRIND_BIN, opts_vgrind);
 482         } else {
 483             (void)execvp(child->command, opts_default);
 484         }
 485         crm_crit("Could not execute %s: %s", child->command, strerror(errno));
 486         crm_exit(CRM_EX_FATAL);
 487     }
 488     return pcmk_rc_ok;          /* never reached */
 489 }
 490 
 491 /*!
 492  * \internal
 493  * \brief Check the liveness of the child based on IPC name and PID if tracked
 494  *
 495  * \param[inout] child  Child tracked data
 496  *
 497  * \return Standard Pacemaker return code
 498  *
 499  * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
 500  *       indicating that no trace of IPC liveness was detected,
 501  *       pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
 502  *       an unauthorized process, and pcmk_rc_ipc_pid_only indicating that
 503  *       the child is up by PID but not IPC end-point (possibly starting).
 504  * \note This function doesn't modify any of \p child members but \c pid,
 505  *       and is not actively toying with processes as such but invoking
 506  *       \c stop_child in one particular case (there's for some reason
 507  *       a different authentic holder of the IPC end-point).
 508  */
 509 static int
 510 child_liveness(pcmk_child_t *child)
     /*  */
 511 {
 512     uid_t cl_uid = 0;
 513     gid_t cl_gid = 0;
 514     const uid_t root_uid = 0;
 515     const gid_t root_gid = 0;
 516     const uid_t *ref_uid;
 517     const gid_t *ref_gid;
 518     int rc = pcmk_rc_ipc_unresponsive;
 519     pid_t ipc_pid = 0;
 520 
 521     if (child->endpoint == NULL
 522             && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
 523         crm_err("Cannot track child %s for missing both API end-point and PID",
 524                 child->name);
 525         rc = EINVAL; // Misuse of function when child is not trackable
 526 
 527     } else if (child->endpoint != NULL) {
 528         int legacy_rc = pcmk_ok;
 529 
 530         if (child->uid == NULL) {
 531             ref_uid = &root_uid;
 532             ref_gid = &root_gid;
 533         } else {
 534             ref_uid = &cl_uid;
 535             ref_gid = &cl_gid;
 536             legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
 537         }
 538 
 539         if (legacy_rc < 0) {
 540             rc = pcmk_legacy2rc(legacy_rc);
 541             crm_err("Could not find user and group IDs for user %s: %s "
 542                     CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
 543         } else {
 544             rc = pcmk__ipc_is_authentic_process_active(child->endpoint,
 545                                                        *ref_uid, *ref_gid,
 546                                                        &ipc_pid);
 547             if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
 548                 if (child->pid <= 0) {
 549                     /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this
 550                      * initializes a new child. If rc is
 551                      * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will
 552                      * investigate further.
 553                      */
 554                     child->pid = ipc_pid;
 555                 } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
 556                     /* An unexpected (but authorized) process is responding to
 557                      * IPC. Investigate further.
 558                      */
 559                     rc = pcmk_rc_ipc_unresponsive;
 560                 }
 561             }
 562         }
 563     }
 564 
 565     if (rc == pcmk_rc_ipc_unresponsive) {
 566         /* If we get here, a child without IPC is being tracked, no IPC liveness
 567          * has been detected, or IPC liveness has been detected with an
 568          * unexpected (but authorized) process. This is safe on FreeBSD since
 569          * the only change possible from a proper child's PID into "special" PID
 570          * of 1 behind more loosely related process.
 571          */
 572         int ret = pcmk__pid_active(child->pid, child->name);
 573 
 574         if (ipc_pid && ((ret != pcmk_rc_ok)
 575                         || ipc_pid == PCMK__SPECIAL_PID
 576                         || (pcmk__pid_active(ipc_pid,
 577                                              child->name) == pcmk_rc_ok))) {
 578             /* An unexpected (but authorized) process was detected at the IPC
 579              * endpoint, and either it is active, or the child we're tracking is
 580              * not.
 581              */
 582 
 583             if (ret == pcmk_rc_ok) {
 584                 /* The child we're tracking is active. Kill it, and adopt the
 585                  * detected process. This assumes that our children don't fork
 586                  * (thus getting a different PID owning the IPC), but rather the
 587                  * tracking got out of sync because of some means external to
 588                  * Pacemaker, and adopting the detected process is better than
 589                  * killing it and possibly having to spawn a new child.
 590                  */
 591                 /* not possessing IPC, afterall (what about corosync CPG?) */
 592                 stop_child(child, SIGKILL);
 593             }
 594             rc = pcmk_rc_ok;
 595             child->pid = ipc_pid;
 596         } else if (ret == pcmk_rc_ok) {
 597             // Our tracked child's PID was found active, but not its IPC
 598             rc = pcmk_rc_ipc_pid_only;
 599         } else if ((child->pid == 0) && (ret == EINVAL)) {
 600             // FreeBSD can return EINVAL
 601             rc = pcmk_rc_ipc_unresponsive;
 602         } else {
 603             switch (ret) {
 604                 case EACCES:
 605                     rc = pcmk_rc_ipc_unauthorized;
 606                     break;
 607                 case ESRCH:
 608                     rc = pcmk_rc_ipc_unresponsive;
 609                     break;
 610                 default:
 611                     rc = ret;
 612                     break;
 613             }
 614         }
 615     }
 616     return rc;
 617 }
 618 
 619 /*!
 620  * \internal
 621  * \brief Initial one-off check of the pre-existing "child" processes
 622  *
 623  * With "child" process, we mean the subdaemon that defines an API end-point
 624  * (all of them do as of the comment) -- the possible complement is skipped
 625  * as it is deemed it has no such shared resources to cause conflicts about,
 626  * hence it can presumably be started anew without hesitation.
 627  * If that won't hold true in the future, the concept of a shared resource
 628  * will have to be generalized beyond the API end-point.
 629  *
 630  * For boundary cases that the "child" is still starting (IPC end-point is yet
 631  * to be witnessed), or more rarely (practically FreeBSD only), when there's
 632  * a pre-existing "untrackable" authentic process, we give the situation some
 633  * time to possibly unfold in the right direction, meaning that said socket
 634  * will appear or the unattainable process will disappear per the observable
 635  * IPC, respectively.
 636  *
 637  * \return Standard Pacemaker return code
 638  *
 639  * \note Since this gets run at the very start, \c respawn_count fields
 640  *       for particular children get temporarily overloaded with "rounds
 641  *       of waiting" tracking, restored once we are about to finish with
 642  *       success (i.e. returning value >=0) and will remain unrestored
 643  *       otherwise.  One way to suppress liveness detection logic for
 644  *       particular child is to set the said value to a negative number.
 645  */
 646 #define WAIT_TRIES 4  /* together with interleaved sleeps, worst case ~ 1s */
 647 int
 648 find_and_track_existing_processes(void)
     /*  */
 649 {
 650     bool tracking = false;
 651     bool wait_in_progress;
 652     int rc;
 653     size_t i, rounds;
 654 
 655     for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
 656         wait_in_progress = false;
 657         for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 658 
 659             if ((pcmk_children[i].endpoint == NULL)
 660                 || (pcmk_children[i].respawn_count < 0)) {
 661                 continue;
 662             }
 663 
 664             rc = child_liveness(&pcmk_children[i]);
 665             if (rc == pcmk_rc_ipc_unresponsive) {
 666                 /* As a speculation, don't give up if there are more rounds to
 667                  * come for other reasons, but don't artificially wait just
 668                  * because of this, since we would preferably start ASAP.
 669                  */
 670                 continue;
 671             }
 672 
 673             pcmk_children[i].respawn_count = rounds;
 674             switch (rc) {
 675                 case pcmk_rc_ok:
 676                     if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
 677                         if (crm_is_true(getenv("PCMK_fail_fast"))) {
 678                             crm_crit("Cannot reliably track pre-existing"
 679                                      " authentic process behind %s IPC on this"
 680                                      " platform and PCMK_fail_fast requested",
 681                                      pcmk_children[i].endpoint);
 682                             return EOPNOTSUPP;
 683                         } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
 684                             crm_notice("Assuming pre-existing authentic, though"
 685                                        " on this platform untrackable, process"
 686                                        " behind %s IPC is stable (was in %d"
 687                                        " previous samples) so rather than"
 688                                        " bailing out (PCMK_fail_fast not"
 689                                        " requested), we just switch to a less"
 690                                        " optimal IPC liveness monitoring"
 691                                        " (not very suitable for heavy load)",
 692                                        pcmk_children[i].name, WAIT_TRIES - 1);
 693                             crm_warn("The process behind %s IPC cannot be"
 694                                      " terminated, so the overall shutdown"
 695                                      " will get delayed implicitly (%ld s),"
 696                                      " which serves as a graceful period for"
 697                                      " its native termination if it vitally"
 698                                      " depends on some other daemons going"
 699                                      " down in a controlled way already",
 700                                      pcmk_children[i].name,
 701                                      (long) SHUTDOWN_ESCALATION_PERIOD);
 702                         } else {
 703                             wait_in_progress = true;
 704                             crm_warn("Cannot reliably track pre-existing"
 705                                      " authentic process behind %s IPC on this"
 706                                      " platform, can still disappear in %d"
 707                                      " attempt(s)", pcmk_children[i].endpoint,
 708                                      WAIT_TRIES - pcmk_children[i].respawn_count);
 709                             continue;
 710                         }
 711                     }
 712                     crm_notice("Tracking existing %s process (pid=%lld)",
 713                                pcmk_children[i].name,
 714                                (long long) PCMK__SPECIAL_PID_AS_0(
 715                                                pcmk_children[i].pid));
 716                     pcmk_children[i].respawn_count = -1;  /* 0~keep watching */
 717                     pcmk_children[i].active_before_startup = true;
 718                     tracking = true;
 719                     break;
 720                 case pcmk_rc_ipc_pid_only:
 721                     if (pcmk_children[i].respawn_count == WAIT_TRIES) {
 722                         crm_crit("%s IPC end-point for existing authentic"
 723                                  " process %lld did not (re)appear",
 724                                  pcmk_children[i].endpoint,
 725                                  (long long) PCMK__SPECIAL_PID_AS_0(
 726                                                  pcmk_children[i].pid));
 727                         return rc;
 728                     }
 729                     wait_in_progress = true;
 730                     crm_warn("Cannot find %s IPC end-point for existing"
 731                              " authentic process %lld, can still (re)appear"
 732                              " in %d attempts (?)",
 733                              pcmk_children[i].endpoint,
 734                              (long long) PCMK__SPECIAL_PID_AS_0(
 735                                              pcmk_children[i].pid),
 736                              WAIT_TRIES - pcmk_children[i].respawn_count);
 737                     continue;
 738                 default:
 739                     crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d",
 740                              pcmk_children[i].name, pcmk_rc_str(rc), rc);
 741                     return rc;
 742             }
 743         }
 744         if (!wait_in_progress) {
 745             break;
 746         }
 747         pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen
 748     }
 749     for (i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 750         pcmk_children[i].respawn_count = 0;  /* restore pristine state */
 751     }
 752 
 753     if (tracking) {
 754         g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
 755                               check_active_before_startup_processes, NULL);
 756     }
 757     return pcmk_rc_ok;
 758 }
 759 
 760 gboolean
 761 init_children_processes(void *user_data)
     /*  */
 762 {
 763     /* start any children that have not been detected */
 764     for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 765         if (pcmk_children[i].pid != 0) {
 766             /* we are already tracking it */
 767             continue;
 768         }
 769 
 770         start_child(&(pcmk_children[i]));
 771     }
 772 
 773     /* From this point on, any daemons being started will be due to
 774      * respawning rather than node start.
 775      *
 776      * This may be useful for the daemons to know
 777      */
 778     setenv("PCMK_respawned", "true", 1);
 779     pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
 780     return TRUE;
 781 }
 782 
 783 void
 784 pcmk_shutdown(int nsig)
     /*  */
 785 {
 786     if (shutdown_trigger == NULL) {
 787         shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
 788     }
 789     mainloop_set_trigger(shutdown_trigger);
 790 }
 791 
 792 void
 793 restart_cluster_subdaemons(void)
     /*  */
 794 {
 795     for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
 796         if (!pcmk_children[i].needs_retry || pcmk_children[i].pid != 0) {
 797             continue;
 798         }
 799 
 800         crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name);
 801         if (start_child(&pcmk_children[i])) {
 802             pcmk_children[i].needs_retry = false;
 803         }
 804     }
 805 }
 806 
 807 static gboolean
 808 stop_child(pcmk_child_t * child, int signal)
     /*  */
 809 {
 810     if (signal == 0) {
 811         signal = SIGTERM;
 812     }
 813 
 814     /* why to skip PID of 1?
 815        - FreeBSD ~ how untrackable process behind IPC is masqueraded as
 816        - elsewhere: how "init" task is designated; in particular, in systemd
 817          arrangement of socket-based activation, this is pretty real */
 818     if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
 819         crm_debug("Nothing to do for child \"%s\" (process %lld)",
 820                   child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
 821         return TRUE;
 822     }
 823 
 824     if (child->pid <= 0) {
 825         crm_trace("Client %s not running", child->name);
 826         return TRUE;
 827     }
 828 
 829     errno = 0;
 830     if (kill(child->pid, signal) == 0) {
 831         crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld",
 832                    child->name, signal, (long long) child->pid);
 833 
 834     } else {
 835         crm_err("Could not stop %s (process %lld) with signal %d: %s",
 836                 child->name, (long long) child->pid, signal, strerror(errno));
 837     }
 838 
 839     return TRUE;
 840 }
 841
/* */
root/daemons/pacemakerd/pcmkd_subdaemons.c

DEFINITIONS