root/daemons/pacemakerd/pacemakerd.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. pcmk_process_exit
  2. pcmk_child_exit
  3. stop_child
  4. start_child
  5. escalate_shutdown
  6. pcmk_shutdown_worker
  7. pcmk_ignore
  8. pcmk_sigquit
  9. pcmk_shutdown
  10. pcmk_ipc_accept
  11. pcmk_handle_ping_request
  12. pcmk_ipc_dispatch
  13. pcmk_ipc_closed
  14. pcmk_ipc_destroy
  15. mcp_chown
  16. create_pcmk_dirs
  17. child_liveness
  18. check_active_before_startup_processes
  19. find_and_track_existing_processes
  20. init_children_processes
  21. remove_core_file_limit
  22. request_shutdown
  23. main

   1 /*
   2  * Copyright 2010-2021 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include "pacemakerd.h"
  12 
  13 #include <pwd.h>
  14 #include <grp.h>
  15 #include <errno.h>
  16 #include <stdio.h>
  17 #include <stdbool.h>
  18 #include <sys/stat.h>
  19 #include <sys/types.h>
  20 #include <sys/time.h>
  21 #include <sys/resource.h>
  22 #include <sys/reboot.h>
  23 
  24 #include <crm/crm.h>  /* indirectly: CRM_EX_* */
  25 #include <crm/cib/internal.h>  /* cib_channel_ro */
  26 #include <crm/msg_xml.h>
  27 #include <crm/common/ipc_internal.h>
  28 #include <crm/common/mainloop.h>
  29 #include <crm/cluster/internal.h>
  30 #include <crm/cluster.h>
  31 
  32 #include <dirent.h>
  33 #include <ctype.h>
  34 
  35 static gboolean fatal_error = FALSE;
  36 static GMainLoop *mainloop = NULL;
  37 static bool global_keep_tracking = false;
  38 
  39 #define PCMK_PROCESS_CHECK_INTERVAL 5
  40 
  41 static crm_trigger_t *shutdown_trigger = NULL;
  42 static crm_trigger_t *startup_trigger = NULL;
  43 static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid";
  44 
  45 /* state we report when asked via pacemakerd-api status-ping */
  46 static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
  47 static gboolean running_with_sbd = FALSE; /* local copy */
  48 /* When contacted via pacemakerd-api by a client having sbd in
  49  * the name we assume it is sbd-daemon which wants to know
  50  * if pacemakerd shutdown gracefully.
  51  * Thus when everything is shutdown properly pacemakerd
  52  * waits till it has reported the graceful completion of
  53  * shutdown to sbd and just when sbd-client closes the
  54  * connection we can assume that the report has arrived
  55  * properly so that pacemakerd can finally exit.
  56  * Following two variables are used to track that handshake.
  57  */
  58 static unsigned int shutdown_complete_state_reported_to = 0;
  59 static gboolean shutdown_complete_state_reported_client_closed = FALSE;
  60 
  61 typedef struct pcmk_child_s {
  62     pid_t pid;
  63     int start_seq;
  64     int respawn_count;
  65     gboolean respawn;
  66     const char *name;
  67     const char *uid;
  68     const char *command;
  69     const char *endpoint;  /* IPC server name */
  70 
  71     gboolean active_before_startup;
  72 } pcmk_child_t;
  73 
  74 /* Index into the array below */
  75 #define PCMK_CHILD_CONTROLD  3
  76 
  77 static pcmk_child_t pcmk_children[] = {
  78     {
  79         0, 0, 0, FALSE, "none", NULL, NULL, NULL
  80     },
  81     {
  82         0, 3, 0, TRUE,  "pacemaker-execd", NULL,
  83         CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD
  84     },
  85     {
  86         0, 1, 0, TRUE,  "pacemaker-based", CRM_DAEMON_USER,
  87         CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO
  88     },
  89     {
  90         0, 6, 0, TRUE, "pacemaker-controld", CRM_DAEMON_USER,
  91         CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD
  92     },
  93     {
  94         0, 4, 0, TRUE, "pacemaker-attrd", CRM_DAEMON_USER,
  95         CRM_DAEMON_DIR "/pacemaker-attrd", T_ATTRD
  96     },
  97     {
  98         0, 5, 0, TRUE, "pacemaker-schedulerd", CRM_DAEMON_USER,
  99         CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE
 100     },
 101     {
 102         0, 2, 0, TRUE, "pacemaker-fenced", NULL,
 103         CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng"
 104     },
 105 };
 106 
 107 static gboolean check_active_before_startup_processes(gpointer user_data);
 108 static int child_liveness(pcmk_child_t *child);
 109 static gboolean start_child(pcmk_child_t * child);
 110 
 111 static void
 112 pcmk_process_exit(pcmk_child_t * child)
     /* [previous][next][first][last][top][bottom][index][help] */
 113 {
 114     child->pid = 0;
 115     child->active_before_startup = FALSE;
 116 
 117     child->respawn_count += 1;
 118     if (child->respawn_count > MAX_RESPAWN) {
 119         crm_err("Child respawn count exceeded by %s", child->name);
 120         child->respawn = FALSE;
 121     }
 122 
 123     if (shutdown_trigger) {
 124         /* resume step-wise shutdown (returned TRUE yields no parallelizing) */
 125         mainloop_set_trigger(shutdown_trigger);
 126 
 127     } else if (!child->respawn) {
 128         /* nothing to do */
 129 
 130     } else if (crm_is_true(getenv("PCMK_fail_fast"))) {
 131         crm_err("Rebooting system because of %s", child->name);
 132         pcmk__panic(__func__);
 133 
 134     } else if (child_liveness(child) == pcmk_rc_ok) {
 135         crm_warn("One-off suppressing strict respawning of a child process %s,"
 136                  " appears alright per %s IPC end-point",
 137                  child->name, child->endpoint);
 138         /* need to monitor how it evolves, and start new process if badly */
 139         child->active_before_startup = TRUE;
 140         if (!global_keep_tracking) {
 141             global_keep_tracking = true;
 142             g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
 143                                   check_active_before_startup_processes, NULL);
 144         }
 145 
 146     } else {
 147         crm_notice("Respawning failed child process: %s", child->name);
 148         start_child(child);
 149     }
 150 }
 151 
 152 static void
 153 pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
     /* [previous][next][first][last][top][bottom][index][help] */
 154 {
 155     pcmk_child_t *child = mainloop_child_userdata(p);
 156     const char *name = mainloop_child_name(p);
 157 
 158     if (signo) {
 159         do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR),
 160                    "%s[%d] terminated with signal %d (core=%d)",
 161                    name, pid, signo, core);
 162 
 163     } else {
 164         switch(exitcode) {
 165             case CRM_EX_OK:
 166                 crm_info("%s[%d] exited with status %d (%s)",
 167                          name, pid, exitcode, crm_exit_str(exitcode));
 168                 break;
 169 
 170             case CRM_EX_FATAL:
 171                 crm_warn("Shutting cluster down because %s[%d] had fatal failure",
 172                          name, pid);
 173                 child->respawn = FALSE;
 174                 fatal_error = TRUE;
 175                 pcmk_shutdown(SIGTERM);
 176                 break;
 177 
 178             case CRM_EX_PANIC:
 179                 crm_emerg("%s[%d] instructed the machine to reset", name, pid);
 180                 child->respawn = FALSE;
 181                 fatal_error = TRUE;
 182                 pcmk__panic(__func__);
 183                 pcmk_shutdown(SIGTERM);
 184                 break;
 185 
 186             default:
 187                 crm_err("%s[%d] exited with status %d (%s)",
 188                         name, pid, exitcode, crm_exit_str(exitcode));
 189                 break;
 190         }
 191     }
 192 
 193     pcmk_process_exit(child);
 194 }
 195 
 196 static gboolean
 197 stop_child(pcmk_child_t * child, int signal)
     /* [previous][next][first][last][top][bottom][index][help] */
 198 {
 199     if (signal == 0) {
 200         signal = SIGTERM;
 201     }
 202 
 203     /* why to skip PID of 1?
 204        - FreeBSD ~ how untrackable process behind IPC is masqueraded as
 205        - elsewhere: how "init" task is designated; in particular, in systemd
 206          arrangement of socket-based activation, this is pretty real */
 207     if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
 208         crm_debug("Nothing to do for child \"%s\" (process %lld)",
 209                   child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
 210         return TRUE;
 211     }
 212 
 213     if (child->pid <= 0) {
 214         crm_trace("Client %s not running", child->name);
 215         return TRUE;
 216     }
 217 
 218     errno = 0;
 219     if (kill(child->pid, signal) == 0) {
 220         crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld",
 221                    child->name, signal, (long long) child->pid);
 222 
 223     } else {
 224         crm_err("Could not stop %s (process %lld) with signal %d: %s",
 225                 child->name, (long long) child->pid, signal, strerror(errno));
 226     }
 227 
 228     return TRUE;
 229 }
 230 
 231 static char *opts_default[] = { NULL, NULL };
 232 static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
 233 
 234 /* TODO once libqb is taught to juggle with IPC end-points carried over as
 235         bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
 236         it shall hand over these descriptors here if/once they are successfully
 237         pre-opened in (presumably) child_liveness(), to avoid any remaining
 238         room for races */
 239 static gboolean
 240 start_child(pcmk_child_t * child)
     /* [previous][next][first][last][top][bottom][index][help] */
 241 {
 242     uid_t uid = 0;
 243     gid_t gid = 0;
 244     gboolean use_valgrind = FALSE;
 245     gboolean use_callgrind = FALSE;
 246     const char *env_valgrind = getenv("PCMK_valgrind_enabled");
 247     const char *env_callgrind = getenv("PCMK_callgrind_enabled");
 248 
 249     child->active_before_startup = FALSE;
 250 
 251     if (child->command == NULL) {
 252         crm_info("Nothing to do for child \"%s\"", child->name);
 253         return TRUE;
 254     }
 255 
 256     if (env_callgrind != NULL && crm_is_true(env_callgrind)) {
 257         use_callgrind = TRUE;
 258         use_valgrind = TRUE;
 259 
 260     } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) {
 261         use_callgrind = TRUE;
 262         use_valgrind = TRUE;
 263 
 264     } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) {
 265         use_valgrind = TRUE;
 266 
 267     } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) {
 268         use_valgrind = TRUE;
 269     }
 270 
 271     if (use_valgrind && strlen(VALGRIND_BIN) == 0) {
 272         crm_warn("Cannot enable valgrind for %s:"
 273                  " The location of the valgrind binary is unknown", child->name);
 274         use_valgrind = FALSE;
 275     }
 276 
 277     if (child->uid) {
 278         if (crm_user_lookup(child->uid, &uid, &gid) < 0) {
 279             crm_err("Invalid user (%s) for %s: not found", child->uid, child->name);
 280             return FALSE;
 281         }
 282         crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name);
 283     }
 284 
 285     child->pid = fork();
 286     CRM_ASSERT(child->pid != -1);
 287 
 288     if (child->pid > 0) {
 289         /* parent */
 290         mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit);
 291 
 292         crm_info("Forked child %lld for process %s%s",
 293                  (long long) child->pid, child->name,
 294                  use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : "");
 295         return TRUE;
 296 
 297     } else {
 298         /* Start a new session */
 299         (void)setsid();
 300 
 301         /* Setup the two alternate arg arrays */
 302         opts_vgrind[0] = strdup(VALGRIND_BIN);
 303         if (use_callgrind) {
 304             opts_vgrind[1] = strdup("--tool=callgrind");
 305             opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p");
 306             opts_vgrind[3] = strdup(child->command);
 307             opts_vgrind[4] = NULL;
 308         } else {
 309             opts_vgrind[1] = strdup(child->command);
 310             opts_vgrind[2] = NULL;
 311             opts_vgrind[3] = NULL;
 312             opts_vgrind[4] = NULL;
 313         }
 314         opts_default[0] = strdup(child->command);
 315 
 316         if(gid) {
 317             // Whether we need root group access to talk to cluster layer
 318             bool need_root_group = TRUE;
 319 
 320             if (is_corosync_cluster()) {
 321                 /* Corosync clusters can drop root group access, because we set
 322                  * uidgid.gid.${gid}=1 via CMAP, which allows these processes to
 323                  * connect to corosync.
 324                  */
 325                 need_root_group = FALSE;
 326             }
 327 
 328             // Drop root group access if not needed
 329             if (!need_root_group && (setgid(gid) < 0)) {
 330                 crm_warn("Could not set group to %d: %s", gid, strerror(errno));
 331             }
 332 
 333             /* Initialize supplementary groups to only those always granted to
 334              * the user, plus haclient (so we can access IPC).
 335              */
 336             if (initgroups(child->uid, gid) < 0) {
 337                 crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno);
 338             }
 339         }
 340 
 341         if (uid && setuid(uid) < 0) {
 342             crm_warn("Could not set user to %s (id %d): %s",
 343                      child->uid, uid, strerror(errno));
 344         }
 345 
 346         pcmk__close_fds_in_child(true);
 347 
 348         pcmk__open_devnull(O_RDONLY);   // stdin (fd 0)
 349         pcmk__open_devnull(O_WRONLY);   // stdout (fd 1)
 350         pcmk__open_devnull(O_WRONLY);   // stderr (fd 2)
 351 
 352         if (use_valgrind) {
 353             (void)execvp(VALGRIND_BIN, opts_vgrind);
 354         } else {
 355             (void)execvp(child->command, opts_default);
 356         }
 357         crm_crit("Could not execute %s: %s", child->command, strerror(errno));
 358         crm_exit(CRM_EX_FATAL);
 359     }
 360     return TRUE;                /* never reached */
 361 }
 362 
 363 static gboolean
 364 escalate_shutdown(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 365 {
 366 
 367     pcmk_child_t *child = data;
 368 
 369     if (child->pid == PCMK__SPECIAL_PID) {
 370         pcmk_process_exit(child);
 371 
 372     } else if (child->pid != 0) {
 373         /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
 374         crm_err("Child %s not terminating in a timely manner, forcing", child->name);
 375         stop_child(child, SIGSEGV);
 376     }
 377     return FALSE;
 378 }
 379 
 380 #define SHUTDOWN_ESCALATION_PERIOD 180000  /* 3m */
 381 
 382 static gboolean
 383 pcmk_shutdown_worker(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 384 {
 385     static int phase = SIZEOF(pcmk_children);
 386     static time_t next_log = 0;
 387 
 388     int lpc = 0;
 389 
 390     if (phase == SIZEOF(pcmk_children)) {
 391         crm_notice("Shutting down Pacemaker");
 392         pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
 393     }
 394 
 395     for (; phase > 0; phase--) {
 396         /* Don't stop anything with start_seq < 1 */
 397 
 398         for (lpc = SIZEOF(pcmk_children) - 1; lpc >= 0; lpc--) {
 399             pcmk_child_t *child = &(pcmk_children[lpc]);
 400 
 401             if (phase != child->start_seq) {
 402                 continue;
 403             }
 404 
 405             if (child->pid != 0) {
 406                 time_t now = time(NULL);
 407 
 408                 if (child->respawn) {
 409                     if (child->pid == PCMK__SPECIAL_PID) {
 410                         crm_warn("The process behind %s IPC cannot be"
 411                                  " terminated, so either wait the graceful"
 412                                  " period of %ld s for its native termination"
 413                                  " if it vitally depends on some other daemons"
 414                                  " going down in a controlled way already,"
 415                                  " or locate and kill the correct %s process"
 416                                  " on your own; set PCMK_fail_fast=1 to avoid"
 417                                  " this altogether next time around",
 418                                  child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
 419                                  child->command);
 420                     }
 421                     next_log = now + 30;
 422                     child->respawn = FALSE;
 423                     stop_child(child, SIGTERM);
 424                     if (phase < pcmk_children[PCMK_CHILD_CONTROLD].start_seq) {
 425                         g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
 426                                       escalate_shutdown, child);
 427                     }
 428 
 429                 } else if (now >= next_log) {
 430                     next_log = now + 30;
 431                     crm_notice("Still waiting for %s to terminate "
 432                                CRM_XS " pid=%lld seq=%d",
 433                                child->name, (long long) child->pid,
 434                                child->start_seq);
 435                 }
 436                 return TRUE;
 437             }
 438 
 439             /* cleanup */
 440             crm_debug("%s confirmed stopped", child->name);
 441             child->pid = 0;
 442         }
 443     }
 444 
 445     crm_notice("Shutdown complete");
 446     pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
 447     if (!fatal_error && running_with_sbd &&
 448         pcmk__get_sbd_sync_resource_startup() &&
 449         !shutdown_complete_state_reported_client_closed) {
 450         crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
 451         return TRUE;
 452     }
 453 
 454     {
 455         const char *delay = pcmk__env_option("shutdown_delay");
 456         if(delay) {
 457             sync();
 458             pcmk__sleep_ms(crm_get_msec(delay));
 459         }
 460     }
 461 
 462     g_main_loop_quit(mainloop);
 463 
 464     if (fatal_error) {
 465         crm_notice("Shutting down and staying down after fatal error");
 466 #ifdef SUPPORT_COROSYNC
 467         pcmkd_shutdown_corosync();
 468 #endif
 469         crm_exit(CRM_EX_FATAL);
 470     }
 471 
 472     return TRUE;
 473 }
 474 
 475 static void
 476 pcmk_ignore(int nsig)
     /* [previous][next][first][last][top][bottom][index][help] */
 477 {
 478     crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig);
 479 }
 480 
 481 static void
 482 pcmk_sigquit(int nsig)
     /* [previous][next][first][last][top][bottom][index][help] */
 483 {
 484     pcmk__panic(__func__);
 485 }
 486 
 487 void
 488 pcmk_shutdown(int nsig)
     /* [previous][next][first][last][top][bottom][index][help] */
 489 {
 490     if (shutdown_trigger == NULL) {
 491         shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL);
 492     }
 493     mainloop_set_trigger(shutdown_trigger);
 494 }
 495 
 496 static int32_t
 497 pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
     /* [previous][next][first][last][top][bottom][index][help] */
 498 {
 499     crm_trace("Connection %p", c);
 500     if (pcmk__new_client(c, uid, gid) == NULL) {
 501         return -EIO;
 502     }
 503     return 0;
 504 }
 505 
 506 static void
 507 pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
     /* [previous][next][first][last][top][bottom][index][help] */
 508 {
 509     const char *value = NULL;
 510     xmlNode *ping = NULL;
 511     xmlNode *reply = NULL;
 512     time_t pinged = time(NULL);
 513     const char *from = crm_element_value(msg, F_CRM_SYS_FROM);
 514 
 515     /* Pinged for status */
 516     crm_trace("Pinged from %s.%s",
 517               crm_str(crm_element_value(msg, F_CRM_ORIGIN)),
 518               from?from:"unknown");
 519     ping = create_xml_node(NULL, XML_CRM_TAG_PING);
 520     value = crm_element_value(msg, F_CRM_SYS_TO);
 521     crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
 522     crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
 523     crm_xml_add_ll(ping, XML_ATTR_TSTAMP, (long long) pinged);
 524     crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
 525     reply = create_reply(msg, ping);
 526     free_xml(ping);
 527     if (reply) {
 528         if (pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event) !=
 529                 pcmk_rc_ok) {
 530             crm_err("Failed sending ping reply to client %s",
 531                     pcmk__client_name(c));
 532         }
 533         free_xml(reply);
 534     } else {
 535         crm_err("Failed building ping reply for client %s",
 536                 pcmk__client_name(c));
 537     }
 538     /* just proceed state on sbd pinging us */
 539     if (from && strstr(from, "sbd")) {
 540         if (pcmk__str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, pcmk__str_none)) {
 541             if (pcmk__get_sbd_sync_resource_startup()) {
 542                 crm_notice("Shutdown-complete-state passed to SBD.");
 543             }
 544             shutdown_complete_state_reported_to = c->pid;
 545         } else if (pcmk__str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, pcmk__str_none)) {
 546             crm_notice("Received startup-trigger from SBD.");
 547             pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
 548             mainloop_set_trigger(startup_trigger);
 549         }
 550     }
 551 }
 552 
 553 /* Exit code means? */
 554 static int32_t
 555 pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
     /* [previous][next][first][last][top][bottom][index][help] */
 556 {
 557     uint32_t id = 0;
 558     uint32_t flags = 0;
 559     const char *task = NULL;
 560     xmlNode *msg = NULL;
 561     pcmk__client_t *c = pcmk__find_client(qbc);
 562 
 563     CRM_CHECK(c != NULL, return 0);
 564 
 565     msg = pcmk__client_data2xml(c, data, &id, &flags);
 566     if (msg == NULL) {
 567         pcmk__ipc_send_ack(c, id, flags, "ack", CRM_EX_PROTOCOL);
 568         return 0;
 569     }
 570 
 571     task = crm_element_value(msg, F_CRM_TASK);
 572     if (pcmk__str_eq(task, CRM_OP_QUIT, pcmk__str_none)) {
 573         /* Only allow privileged users (i.e. root or hacluster) to shut down
 574          * Pacemaker from the command line (or direct IPC), so that other users
 575          * are forced to go through the CIB and have ACLs applied.
 576          */
 577         bool allowed = pcmk_is_set(c->flags, pcmk__client_privileged);
 578 
 579         if (allowed) {
 580             crm_notice("Shutting down in response to IPC request %s from %s",
 581                        crm_element_value(msg, F_CRM_REFERENCE),
 582                        crm_element_value(msg, F_CRM_ORIGIN));
 583             pcmk__ipc_send_ack(c, id, flags, "ack", CRM_EX_OK);
 584             pcmk_shutdown(15);
 585         } else {
 586             crm_warn("Ignoring shutdown request from unprivileged client %s",
 587                      pcmk__client_name(c));
 588             pcmk__ipc_send_ack(c, id, flags, "ack", CRM_EX_INSUFFICIENT_PRIV);
 589         }
 590 
 591     } else if (pcmk__str_eq(task, CRM_OP_RM_NODE_CACHE, pcmk__str_none)) {
 592         crm_trace("Ignoring request from client %s to purge node "
 593                   "because peer cache is not used", pcmk__client_name(c));
 594         pcmk__ipc_send_ack(c, id, flags, "ack", CRM_EX_OK);
 595 
 596     } else if (pcmk__str_eq(task, CRM_OP_PING, pcmk__str_none)) {
 597         pcmk__ipc_send_ack(c, id, flags, "ack", CRM_EX_INDETERMINATE);
 598         pcmk_handle_ping_request(c, msg, id);
 599 
 600     } else {
 601         crm_debug("Unrecognized IPC command '%s' from client %s",
 602                   crm_str(task), pcmk__client_name(c));
 603         pcmk__ipc_send_ack(c, id, flags, "ack", CRM_EX_INVALID_PARAM);
 604     }
 605 
 606     free_xml(msg);
 607     return 0;
 608 }
 609 
 610 /* Error code means? */
 611 static int32_t
 612 pcmk_ipc_closed(qb_ipcs_connection_t * c)
     /* [previous][next][first][last][top][bottom][index][help] */
 613 {
 614     pcmk__client_t *client = pcmk__find_client(c);
 615 
 616     if (client == NULL) {
 617         return 0;
 618     }
 619     crm_trace("Connection %p", c);
 620     if (shutdown_complete_state_reported_to == client->pid) {
 621         shutdown_complete_state_reported_client_closed = TRUE;
 622         if (shutdown_trigger) {
 623             mainloop_set_trigger(shutdown_trigger);
 624         }
 625     }
 626     pcmk__free_client(client);
 627     return 0;
 628 }
 629 
 630 static void
 631 pcmk_ipc_destroy(qb_ipcs_connection_t * c)
     /* [previous][next][first][last][top][bottom][index][help] */
 632 {
 633     crm_trace("Connection %p", c);
 634     pcmk_ipc_closed(c);
 635 }
 636 
 637 struct qb_ipcs_service_handlers mcp_ipc_callbacks = {
 638     .connection_accept = pcmk_ipc_accept,
 639     .connection_created = NULL,
 640     .msg_process = pcmk_ipc_dispatch,
 641     .connection_closed = pcmk_ipc_closed,
 642     .connection_destroyed = pcmk_ipc_destroy
 643 };
 644 
 645 static pcmk__cli_option_t long_options[] = {
 646     // long option, argument type, storage, short option, description, flags
 647     {
 648         "help", no_argument, NULL, '?',
 649         "\tThis text", pcmk__option_default
 650     },
 651     {
 652         "version", no_argument, NULL, '$',
 653         "\tVersion information", pcmk__option_default
 654     },
 655     {
 656         "verbose", no_argument, NULL, 'V',
 657         "\tIncrease debug output", pcmk__option_default
 658     },
 659     {
 660         "shutdown", no_argument, NULL, 'S',
 661         "\tInstruct Pacemaker to shutdown on this machine", pcmk__option_default
 662     },
 663     {
 664         "features", no_argument, NULL, 'F',
 665         "\tDisplay full version and list of features Pacemaker was built with",
 666         pcmk__option_default
 667     },
 668     {
 669         "-spacer-", no_argument, NULL, '-',
 670         "\nAdditional Options:", pcmk__option_default
 671     },
 672     {
 673         "foreground", no_argument, NULL, 'f',
 674         "\t(Ignored) Pacemaker always runs in the foreground",
 675         pcmk__option_default
 676     },
 677     {
 678         "pid-file", required_argument, NULL, 'p',
 679         "\t(Ignored) Daemon pid file location", pcmk__option_default
 680     },
 681     {
 682         "standby", no_argument, NULL, 's',
 683         "\tStart node in standby state", pcmk__option_default
 684     },
 685     { 0, 0, 0, 0 }
 686 };
 687 
 688 static void
 689 mcp_chown(const char *path, uid_t uid, gid_t gid)
     /* [previous][next][first][last][top][bottom][index][help] */
 690 {
 691     int rc = chown(path, uid, gid);
 692 
 693     if (rc < 0) {
 694         crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s",
 695                  path, CRM_DAEMON_USER, gid, pcmk_strerror(errno));
 696     }
 697 }
 698 
 699 static void
 700 create_pcmk_dirs(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 701 {
 702     uid_t pcmk_uid = 0;
 703     gid_t pcmk_gid = 0;
 704 
 705     const char *dirs[] = {
 706         CRM_PACEMAKER_DIR, // core/blackbox/scheduler/CIB files
 707         CRM_CORE_DIR,      // core files
 708         CRM_BLACKBOX_DIR,  // blackbox dumps
 709         PE_STATE_DIR,      // scheduler inputs
 710         CRM_CONFIG_DIR,    // the Cluster Information Base (CIB)
 711         // Don't build CRM_RSCTMP_DIR, pacemaker-execd will do it
 712         NULL
 713     };
 714 
 715     if (pcmk_daemon_user(&pcmk_uid, &pcmk_gid) < 0) {
 716         crm_err("Cluster user %s does not exist, aborting Pacemaker startup",
 717                 CRM_DAEMON_USER);
 718         crm_exit(CRM_EX_NOUSER);
 719     }
 720 
 721     // Used by some resource agents
 722     if ((mkdir(CRM_STATE_DIR, 0750) < 0) && (errno != EEXIST)) {
 723         crm_warn("Could not create directory " CRM_STATE_DIR ": %s",
 724                  pcmk_rc_str(errno));
 725     } else {
 726         mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid);
 727     }
 728 
 729     for (int i = 0; dirs[i] != NULL; ++i) {
 730         int rc = pcmk__build_path(dirs[i], 0750);
 731 
 732         if (rc != pcmk_rc_ok) {
 733             crm_warn("Could not create directory %s: %s",
 734                      dirs[i], pcmk_rc_str(rc));
 735         } else {
 736             mcp_chown(dirs[i], pcmk_uid, pcmk_gid);
 737         }
 738     }
 739 }
 740 
 741 /*!
 742  * \internal
 743  * \brief Check the liveness of the child based on IPC name and PID if tracked
 744  *
 745  * \param[inout] child  Child tracked data
 746  *
 747  * \return Standard Pacemaker return code
 748  *
 749  * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive
 750  *       indicating that no trace of IPC liveness was detected,
 751  *       pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by
 752  *       an unauthorized process, and pcmk_rc_ipc_pid_only indicating that
 753  *       the child is up by PID but not IPC end-point (possibly starting).
 754  * \note This function doesn't modify any of \p child members but \c pid,
 755  *       and is not actively toying with processes as such but invoking
 756  *       \c stop_child in one particular case (there's for some reason
 757  *       a different authentic holder of the IPC end-point).
 758  */
 759 static int
 760 child_liveness(pcmk_child_t *child)
     /* [previous][next][first][last][top][bottom][index][help] */
 761 {
 762     uid_t cl_uid = 0;
 763     gid_t cl_gid = 0;
 764     const uid_t root_uid = 0;
 765     const gid_t root_gid = 0;
 766     const uid_t *ref_uid;
 767     const gid_t *ref_gid;
 768     int rc = pcmk_rc_ipc_unresponsive;
 769     pid_t ipc_pid = 0;
 770 
 771     if (child->endpoint == NULL
 772             && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
 773         crm_err("Cannot track child %s for missing both API end-point and PID",
 774                 child->name);
 775         rc = EINVAL; // Misuse of function when child is not trackable
 776 
 777     } else if (child->endpoint != NULL) {
 778         int legacy_rc = pcmk_ok;
 779 
 780         if (child->uid == NULL) {
 781             ref_uid = &root_uid;
 782             ref_gid = &root_gid;
 783         } else {
 784             ref_uid = &cl_uid;
 785             ref_gid = &cl_gid;
 786             legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid);
 787         }
 788 
 789         if (legacy_rc < 0) {
 790             rc = pcmk_legacy2rc(legacy_rc);
 791             crm_err("Could not find user and group IDs for user %s: %s "
 792                     CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc);
 793         } else {
 794             rc = pcmk__ipc_is_authentic_process_active(child->endpoint,
 795                                                        *ref_uid, *ref_gid,
 796                                                        &ipc_pid);
 797             if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) {
 798                 if (child->pid <= 0) {
 799                     /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this
 800                      * initializes a new child. If rc is
 801                      * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will
 802                      * investigate further.
 803                      */
 804                     child->pid = ipc_pid;
 805                 } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) {
 806                     /* An unexpected (but authorized) process is responding to
 807                      * IPC. Investigate further.
 808                      */
 809                     rc = pcmk_rc_ipc_unresponsive;
 810                 }
 811             }
 812         }
 813     }
 814 
 815     if (rc == pcmk_rc_ipc_unresponsive) {
 816         /* If we get here, a child without IPC is being tracked, no IPC liveness
 817          * has been detected, or IPC liveness has been detected with an
 818          * unexpected (but authorized) process. This is safe on FreeBSD since
 819          * the only change possible from a proper child's PID into "special" PID
 820          * of 1 behind more loosely related process.
 821          */
 822         int ret = pcmk__pid_active(child->pid, child->name);
 823 
 824         if (ipc_pid && ((ret != pcmk_rc_ok)
 825                         || ipc_pid == PCMK__SPECIAL_PID
 826                         || (pcmk__pid_active(ipc_pid,
 827                                              child->name) == pcmk_rc_ok))) {
 828             /* An unexpected (but authorized) process was detected at the IPC
 829              * endpoint, and either it is active, or the child we're tracking is
 830              * not.
 831              */
 832 
 833             if (ret == pcmk_rc_ok) {
 834                 /* The child we're tracking is active. Kill it, and adopt the
 835                  * detected process. This assumes that our children don't fork
 836                  * (thus getting a different PID owning the IPC), but rather the
 837                  * tracking got out of sync because of some means external to
 838                  * Pacemaker, and adopting the detected process is better than
 839                  * killing it and possibly having to spawn a new child.
 840                  */
 841                 /* not possessing IPC, afterall (what about corosync CPG?) */
 842                 stop_child(child, SIGKILL);
 843             }
 844             rc = pcmk_rc_ok;
 845             child->pid = ipc_pid;
 846         } else if (ret == pcmk_rc_ok) {
 847             // Our tracked child's PID was found active, but not its IPC
 848             rc = pcmk_rc_ipc_pid_only;
 849         } else if ((child->pid == 0) && (ret == EINVAL)) {
 850             // FreeBSD can return EINVAL
 851             rc = pcmk_rc_ipc_unresponsive;
 852         } else {
 853             switch (ret) {
 854                 case EACCES:
 855                     rc = pcmk_rc_ipc_unauthorized;
 856                     break;
 857                 case ESRCH:
 858                     rc = pcmk_rc_ipc_unresponsive;
 859                     break;
 860                 default:
 861                     rc = ret;
 862                     break;
 863             }
 864         }
 865     }
 866     return rc;
 867 }
 868 
 869 static gboolean
 870 check_active_before_startup_processes(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 871 {
 872     int start_seq = 1, lpc = 0;
 873     static int max = SIZEOF(pcmk_children);
 874     gboolean keep_tracking = FALSE;
 875 
 876     for (start_seq = 1; start_seq < max; start_seq++) {
 877         for (lpc = 0; lpc < max; lpc++) {
 878             if (pcmk_children[lpc].active_before_startup == FALSE) {
 879                 /* we are already tracking it as a child process. */
 880                 continue;
 881             } else if (start_seq != pcmk_children[lpc].start_seq) {
 882                 continue;
 883             } else {
 884                 int rc = child_liveness(&pcmk_children[lpc]);
 885 
 886                 switch (rc) {
 887                     case pcmk_rc_ok:
 888                         break;
 889                     case pcmk_rc_ipc_unresponsive:
 890                     case pcmk_rc_ipc_pid_only: // This case: it was previously OK
 891                         if (pcmk_children[lpc].respawn == TRUE) {
 892                             crm_err("%s[%lld] terminated%s", pcmk_children[lpc].name,
 893                                     (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid),
 894                                     (rc == pcmk_rc_ipc_pid_only)? " as IPC server" : "");
 895                         } else {
 896                             /* orderly shutdown */
 897                             crm_notice("%s[%lld] terminated%s", pcmk_children[lpc].name,
 898                                        (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid),
 899                                        (rc == pcmk_rc_ipc_pid_only)? " as IPC server" : "");
 900                         }
 901                         pcmk_process_exit(&(pcmk_children[lpc]));
 902                         continue;
 903                     default:
 904                         crm_exit(CRM_EX_FATAL);
 905                         break;  /* static analysis/noreturn */
 906                 }
 907             }
 908             /* at least one of the processes found at startup
 909              * is still going, so keep this recurring timer around */
 910             keep_tracking = TRUE;
 911         }
 912     }
 913 
 914     global_keep_tracking = keep_tracking;
 915     return keep_tracking;
 916 }
 917 
 918 /*!
 919  * \internal
 920  * \brief Initial one-off check of the pre-existing "child" processes
 921  *
 922  * With "child" process, we mean the subdaemon that defines an API end-point
 923  * (all of them do as of the comment) -- the possible complement is skipped
 924  * as it is deemed it has no such shared resources to cause conflicts about,
 925  * hence it can presumably be started anew without hesitation.
 926  * If that won't hold true in the future, the concept of a shared resource
 927  * will have to be generalized beyond the API end-point.
 928  *
 929  * For boundary cases that the "child" is still starting (IPC end-point is yet
 930  * to be witnessed), or more rarely (practically FreeBSD only), when there's
 931  * a pre-existing "untrackable" authentic process, we give the situation some
 932  * time to possibly unfold in the right direction, meaning that said socket
 933  * will appear or the unattainable process will disappear per the observable
 934  * IPC, respectively.
 935  *
 936  * \return Standard Pacemaker return code
 937  *
 938  * \note Since this gets run at the very start, \c respawn_count fields
 939  *       for particular children get temporarily overloaded with "rounds
 940  *       of waiting" tracking, restored once we are about to finish with
 941  *       success (i.e. returning value >=0) and will remain unrestored
 942  *       otherwise.  One way to suppress liveness detection logic for
 943  *       particular child is to set the said value to a negative number.
 944  */
 945 #define WAIT_TRIES 4  /* together with interleaved sleeps, worst case ~ 1s */
 946 static int
 947 find_and_track_existing_processes(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 948 {
 949     bool tracking = false;
 950     bool wait_in_progress;
 951     int rc;
 952     size_t i, rounds;
 953 
 954     for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
 955         wait_in_progress = false;
 956         for (i = 0; i < SIZEOF(pcmk_children); i++) {
 957 
 958             if ((pcmk_children[i].endpoint == NULL)
 959                 || (pcmk_children[i].respawn_count < 0)) {
 960                 continue;
 961             }
 962 
 963             rc = child_liveness(&pcmk_children[i]);
 964             if (rc == pcmk_rc_ipc_unresponsive) {
 965                 /* As a speculation, don't give up if there are more rounds to
 966                  * come for other reasons, but don't artificially wait just
 967                  * because of this, since we would preferably start ASAP.
 968                  */
 969                 continue;
 970             }
 971 
 972             pcmk_children[i].respawn_count = rounds;
 973             switch (rc) {
 974                 case pcmk_rc_ok:
 975                     if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
 976                         if (crm_is_true(getenv("PCMK_fail_fast"))) {
 977                             crm_crit("Cannot reliably track pre-existing"
 978                                      " authentic process behind %s IPC on this"
 979                                      " platform and PCMK_fail_fast requested",
 980                                      pcmk_children[i].endpoint);
 981                             return EOPNOTSUPP;
 982                         } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
 983                             crm_notice("Assuming pre-existing authentic, though"
 984                                        " on this platform untrackable, process"
 985                                        " behind %s IPC is stable (was in %d"
 986                                        " previous samples) so rather than"
 987                                        " bailing out (PCMK_fail_fast not"
 988                                        " requested), we just switch to a less"
 989                                        " optimal IPC liveness monitoring"
 990                                        " (not very suitable for heavy load)",
 991                                        pcmk_children[i].name, WAIT_TRIES - 1);
 992                             crm_warn("The process behind %s IPC cannot be"
 993                                      " terminated, so the overall shutdown"
 994                                      " will get delayed implicitly (%ld s),"
 995                                      " which serves as a graceful period for"
 996                                      " its native termination if it vitally"
 997                                      " depends on some other daemons going"
 998                                      " down in a controlled way already",
 999                                      pcmk_children[i].name,
1000                                      (long) SHUTDOWN_ESCALATION_PERIOD);
1001                         } else {
1002                             wait_in_progress = true;
1003                             crm_warn("Cannot reliably track pre-existing"
1004                                      " authentic process behind %s IPC on this"
1005                                      " platform, can still disappear in %d"
1006                                      " attempt(s)", pcmk_children[i].endpoint,
1007                                      WAIT_TRIES - pcmk_children[i].respawn_count);
1008                             continue;
1009                         }
1010                     }
1011                     crm_notice("Tracking existing %s process (pid=%lld)",
1012                                pcmk_children[i].name,
1013                                (long long) PCMK__SPECIAL_PID_AS_0(
1014                                                pcmk_children[i].pid));
1015                     pcmk_children[i].respawn_count = -1;  /* 0~keep watching */
1016                     pcmk_children[i].active_before_startup = TRUE;
1017                     tracking = true;
1018                     break;
1019                 case pcmk_rc_ipc_pid_only:
1020                     if (pcmk_children[i].respawn_count == WAIT_TRIES) {
1021                         crm_crit("%s IPC end-point for existing authentic"
1022                                  " process %lld did not (re)appear",
1023                                  pcmk_children[i].endpoint,
1024                                  (long long) PCMK__SPECIAL_PID_AS_0(
1025                                                  pcmk_children[i].pid));
1026                         return rc;
1027                     }
1028                     wait_in_progress = true;
1029                     crm_warn("Cannot find %s IPC end-point for existing"
1030                              " authentic process %lld, can still (re)appear"
1031                              " in %d attempts (?)",
1032                              pcmk_children[i].endpoint,
1033                              (long long) PCMK__SPECIAL_PID_AS_0(
1034                                              pcmk_children[i].pid),
1035                              WAIT_TRIES - pcmk_children[i].respawn_count);
1036                     continue;
1037                 default:
1038                     crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d",
1039                              pcmk_children[i].name, pcmk_rc_str(rc), rc);
1040                     return rc;
1041             }
1042         }
1043         if (!wait_in_progress) {
1044             break;
1045         }
1046         pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen
1047     }
1048     for (i = 0; i < SIZEOF(pcmk_children); i++) {
1049         pcmk_children[i].respawn_count = 0;  /* restore pristine state */
1050     }
1051 
1052     if (tracking) {
1053         g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
1054                               check_active_before_startup_processes, NULL);
1055     }
1056     return pcmk_rc_ok;
1057 }
1058 
1059 static gboolean
1060 init_children_processes(void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1061 {
1062     int start_seq = 1, lpc = 0;
1063     static int max = SIZEOF(pcmk_children);
1064 
1065     /* start any children that have not been detected */
1066     for (start_seq = 1; start_seq < max; start_seq++) {
1067         /* don't start anything with start_seq < 1 */
1068         for (lpc = 0; lpc < max; lpc++) {
1069             if (pcmk_children[lpc].pid != 0) {
1070                 /* we are already tracking it */
1071                 continue;
1072             }
1073 
1074             if (start_seq == pcmk_children[lpc].start_seq) {
1075                 start_child(&(pcmk_children[lpc]));
1076             }
1077         }
1078     }
1079 
1080     /* From this point on, any daemons being started will be due to
1081      * respawning rather than node start.
1082      *
1083      * This may be useful for the daemons to know
1084      */
1085     setenv("PCMK_respawned", "true", 1);
1086     pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
1087     return TRUE;
1088 }
1089 
1090 static void
1091 remove_core_file_limit(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1092 {
1093     struct rlimit cores;
1094     int rc = getrlimit(RLIMIT_CORE, &cores);
1095 
1096     if (rc < 0) {
1097         crm_warn("Cannot determine current maximum core file size: %s",
1098                  strerror(errno));
1099         return;
1100     }
1101 
1102     if ((cores.rlim_max == 0) && (geteuid() == 0)) {
1103         cores.rlim_max = RLIM_INFINITY;
1104     } else {
1105         crm_info("Maximum core file size is %llu bytes",
1106                  (unsigned long long) cores.rlim_max);
1107     }
1108     cores.rlim_cur = cores.rlim_max;
1109 
1110     rc = setrlimit(RLIMIT_CORE, &cores);
1111     if (rc < 0) {
1112         crm_warn("Cannot raise system limit on core file size "
1113                  "(consider doing so manually)");
1114     }
1115 }
1116 
1117 static crm_exit_t
1118 request_shutdown(crm_ipc_t *ipc)
     /* [previous][next][first][last][top][bottom][index][help] */
1119 {
1120     xmlNode *request = NULL;
1121     xmlNode *reply = NULL;
1122     int rc = 0;
1123     crm_exit_t status = CRM_EX_OK;
1124 
1125     request = create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP,
1126                              CRM_SYSTEM_MCP, NULL);
1127     if (request == NULL) {
1128         crm_err("Unable to create shutdown request"); // Probably memory error
1129         status = CRM_EX_TEMPFAIL;
1130         goto done;
1131     }
1132 
1133     crm_notice("Requesting shutdown of existing Pacemaker instance");
1134     rc = crm_ipc_send(ipc, request, crm_ipc_client_response, 0, &reply);
1135     if (rc < 0) {
1136         crm_err("Could not send shutdown request");
1137         status = crm_errno2exit(rc);
1138         goto done;
1139     }
1140 
1141     if ((rc == 0) || (reply == NULL)) {
1142         crm_err("Unrecognized response to shutdown request");
1143         status = CRM_EX_PROTOCOL;
1144         goto done;
1145     }
1146 
1147     if ((crm_element_value_int(reply, "status", &rc) == 0)
1148         && (rc != CRM_EX_OK)) {
1149         crm_err("Shutdown request failed: %s", crm_exit_str(rc));
1150         status = rc;
1151         goto done;
1152     }
1153 
1154     // Wait for pacemakerd to shut down IPC (with 30-minute timeout)
1155     status = CRM_EX_TIMEOUT;
1156     for (int i = 0; i < 900; ++i) {
1157         if (!crm_ipc_connected(ipc)) {
1158             status = CRM_EX_OK;
1159             break;
1160         }
1161         sleep(2);
1162     }
1163 
1164 done:
1165     free_xml(request);
1166     crm_ipc_close(ipc);
1167     crm_ipc_destroy(ipc);
1168     return status;
1169 }
1170 
1171 int
1172 main(int argc, char **argv)
     /* [previous][next][first][last][top][bottom][index][help] */
1173 {
1174     int flag;
1175     int argerr = 0;
1176 
1177     int option_index = 0;
1178     bool old_instance_connected = false;
1179     gboolean shutdown = FALSE;
1180 
1181     crm_ipc_t *old_instance = NULL;
1182     qb_ipcs_service_t *ipcs = NULL;
1183 
1184     crm_log_preinit(NULL, argc, argv);
1185     pcmk__set_cli_options(NULL, "[options]", long_options,
1186                           "primary Pacemaker daemon that launches and "
1187                           "monitors all subsidiary Pacemaker daemons");
1188     mainloop_add_signal(SIGHUP, pcmk_ignore);
1189     mainloop_add_signal(SIGQUIT, pcmk_sigquit);
1190 
1191     while (1) {
1192         flag = pcmk__next_cli_option(argc, argv, &option_index, NULL);
1193         if (flag == -1)
1194             break;
1195 
1196         switch (flag) {
1197             case 'V':
1198                 crm_bump_log_level(argc, argv);
1199                 break;
1200             case 'f':
1201                 /* Legacy */
1202                 break;
1203             case 'p':
1204                 pid_file = optarg;
1205                 break;
1206             case 's':
1207                 pcmk__set_env_option("node_start_state", "standby");
1208                 break;
1209             case '$':
1210             case '?':
1211                 pcmk__cli_help(flag, CRM_EX_OK);
1212                 break;
1213             case 'S':
1214                 shutdown = TRUE;
1215                 break;
1216             case 'F':
1217                 printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION,
1218                        CRM_FEATURE_SET, CRM_FEATURES);
1219                 crm_exit(CRM_EX_OK);
1220             default:
1221                 printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag);
1222                 ++argerr;
1223                 break;
1224         }
1225     }
1226 
1227     if (optind < argc) {
1228         printf("non-option ARGV-elements: ");
1229         while (optind < argc)
1230             printf("%s ", argv[optind++]);
1231         printf("\n");
1232     }
1233     if (argerr) {
1234         pcmk__cli_help('?', CRM_EX_USAGE);
1235     }
1236 
1237 
1238     setenv("LC_ALL", "C", 1);
1239 
1240     pcmk__set_env_option("mcp", "true");
1241 
1242     crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
1243 
1244     crm_debug("Checking for existing Pacemaker instance");
1245     old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0);
1246     old_instance_connected = crm_ipc_connect(old_instance);
1247 
1248     if (shutdown) {
1249         if (old_instance_connected) {
1250             crm_exit(request_shutdown(old_instance));
1251         } else {
1252             crm_err("Could not request shutdown of existing "
1253                     "Pacemaker instance: %s", strerror(errno));
1254             crm_ipc_close(old_instance);
1255             crm_ipc_destroy(old_instance);
1256             crm_exit(CRM_EX_DISCONNECT);
1257         }
1258 
1259     } else if (old_instance_connected) {
1260         crm_ipc_close(old_instance);
1261         crm_ipc_destroy(old_instance);
1262         crm_err("Aborting start-up because active Pacemaker instance found");
1263         crm_exit(CRM_EX_FATAL);
1264     }
1265 
1266     crm_ipc_close(old_instance);
1267     crm_ipc_destroy(old_instance);
1268 
1269 #ifdef SUPPORT_COROSYNC
1270     if (mcp_read_config() == FALSE) {
1271         crm_exit(CRM_EX_UNAVAILABLE);
1272     }
1273 #endif
1274 
1275     // OCF shell functions and cluster-glue need facility under different name
1276     {
1277         const char *facility = pcmk__env_option("logfacility");
1278 
1279         if (facility && !pcmk__str_eq(facility, "none", pcmk__str_casei)) {
1280             setenv("HA_LOGFACILITY", facility, 1);
1281         }
1282     }
1283 
1284     crm_notice("Starting Pacemaker %s "CRM_XS" build=%s features:%s",
1285                PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
1286     mainloop = g_main_loop_new(NULL, FALSE);
1287 
1288     remove_core_file_limit();
1289     create_pcmk_dirs();
1290     pcmk__serve_pacemakerd_ipc(&ipcs, &mcp_ipc_callbacks);
1291 
1292 #ifdef SUPPORT_COROSYNC
1293     /* Allows us to block shutdown */
1294     if (!cluster_connect_cfg()) {
1295         crm_exit(CRM_EX_PROTOCOL);
1296     }
1297 #endif
1298 
1299     if (pcmk__locate_sbd() > 0) {
1300         setenv("PCMK_watchdog", "true", 1);
1301         running_with_sbd = TRUE;
1302     } else {
1303         setenv("PCMK_watchdog", "false", 1);
1304     }
1305 
1306     switch (find_and_track_existing_processes()) {
1307         case pcmk_rc_ok:
1308             break;
1309         case pcmk_rc_ipc_unauthorized:
1310             crm_exit(CRM_EX_CANTCREAT);
1311         default:
1312             crm_exit(CRM_EX_FATAL);
1313     };
1314 
1315     mainloop_add_signal(SIGTERM, pcmk_shutdown);
1316     mainloop_add_signal(SIGINT, pcmk_shutdown);
1317 
1318     if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) {
1319         crm_notice("Waiting for startup-trigger from SBD.");
1320         pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
1321         startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
1322     } else {
1323         if (running_with_sbd) {
1324             crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported "
1325                      "by your SBD version) improve reliability of "
1326                      "interworking between SBD & pacemaker.");
1327         }
1328         pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
1329         init_children_processes(NULL);
1330     }
1331 
1332     crm_notice("Pacemaker daemon successfully started and accepting connections");
1333     g_main_loop_run(mainloop);
1334 
1335     if (ipcs) {
1336         crm_trace("Closing IPC server");
1337         mainloop_del_ipc_server(ipcs);
1338         ipcs = NULL;
1339     }
1340 
1341     g_main_loop_unref(mainloop);
1342 #ifdef SUPPORT_COROSYNC
1343     cluster_disconnect_cfg();
1344 #endif
1345     crm_exit(CRM_EX_OK);
1346 }

/* [previous][next][first][last][top][bottom][index][help] */