root/daemons/pacemakerd/pcmkd_corosync.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. cfg_shutdown_callback
  2. pcmk_cfg_dispatch
  3. close_cfg
  4. cluster_reconnect_cb
  5. cfg_connection_destroy
  6. cluster_disconnect_cfg
  7. cluster_connect_cfg
  8. pcmkd_shutdown_corosync
  9. get_config_opt
  10. mcp_read_config

   1 /*
   2  * Copyright 2010-2021 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include "pacemakerd.h"
  12 
  13 #include <sys/utsname.h>
  14 #include <sys/stat.h>           /* for calls to stat() */
  15 #include <libgen.h>             /* For basename() and dirname() */
  16 
  17 #include <sys/types.h>
  18 #include <pwd.h>                /* For getpwname() */
  19 
  20 #include <corosync/hdb.h>
  21 #include <corosync/cfg.h>
  22 #include <corosync/cpg.h>
  23 #include <corosync/cmap.h>
  24 
  25 #include <crm/cluster/internal.h>
  26 #include <crm/common/ipc.h>     /* for crm_ipc_is_authentic_process */
  27 #include <crm/common/mainloop.h>
  28 
  29 #include <crm/common/ipc_internal.h>  /* PCMK__SPECIAL_PID* */
  30 
  31 static corosync_cfg_handle_t cfg_handle = 0;
  32 static mainloop_timer_t *reconnect_timer = NULL;
  33 
  34 /* =::=::=::= CFG - Shutdown stuff =::=::=::= */
  35 
  36 static void
  37 cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags)
     /* [previous][next][first][last][top][bottom][index][help] */
  38 {
  39     crm_info("Corosync wants to shut down: %s",
  40              (flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" :
  41              (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional");
  42 
  43     /* Never allow corosync to shut down while we're running */
  44     corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO);
  45 }
  46 
  47 static corosync_cfg_callbacks_t cfg_callbacks = {
  48     .corosync_cfg_shutdown_callback = cfg_shutdown_callback,
  49 };
  50 
  51 static int
  52 pcmk_cfg_dispatch(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  53 {
  54     corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data;
  55     cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL);
  56 
  57     if (rc != CS_OK) {
  58         return -1;
  59     }
  60     return 0;
  61 }
  62 
  63 static void
  64 close_cfg(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  65 {
  66     if (cfg_handle != 0) {
  67 #ifdef HAVE_COROSYNC_CFG_TRACKSTART
  68         /* Ideally, we would call corosync_cfg_trackstop(cfg_handle) here, but a
  69          * bug in corosync 3.1.1 and 3.1.2 makes it hang forever. Thankfully,
  70          * it's not necessary since we exit immediately after this.
  71          */
  72 #endif
  73         corosync_cfg_finalize(cfg_handle);
  74         cfg_handle = 0;
  75     }
  76 }
  77 
  78 static gboolean
  79 cluster_reconnect_cb(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
  80 {
  81     if (cluster_connect_cfg()) {
  82         mainloop_timer_del(reconnect_timer);
  83         reconnect_timer = NULL;
  84         crm_notice("Cluster reconnect succeeded");
  85     } else {
  86         crm_info("Cluster reconnect failed"
  87                  "(connection will be reattempted once per second)");
  88     }
  89     /*
  90      * In theory this will continue forever. In practice the CIB connection from
  91      * attrd will timeout and shut down Pacemaker when it gets bored.
  92      */
  93     return TRUE;
  94 }
  95 
  96 
  97 static void
  98 cfg_connection_destroy(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  99 {
 100     crm_warn("Lost connection to cluster layer "
 101              "(connection will be reattempted once per second)");
 102     corosync_cfg_finalize(cfg_handle);
 103     cfg_handle = 0;
 104     reconnect_timer = mainloop_timer_add("corosync reconnect", 1000, TRUE, cluster_reconnect_cb, NULL);
 105     mainloop_timer_start(reconnect_timer);
 106 }
 107 
 108 void
 109 cluster_disconnect_cfg(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 110 {
 111     close_cfg();
 112     if (reconnect_timer != NULL) {
 113         /* The mainloop should be gone by this point, so this isn't necessary,
 114          * but cleaning up memory should make valgrind happier.
 115          */
 116         mainloop_timer_del(reconnect_timer);
 117         reconnect_timer = NULL;
 118     }
 119 }
 120 
 121 #define cs_repeat(counter, max, code) do {              \
 122         code;                                           \
 123         if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {  \
 124             counter++;                                  \
 125             crm_debug("Retrying Corosync operation after %ds", counter);    \
 126             sleep(counter);                             \
 127         } else {                                        \
 128             break;                                      \
 129         }                                               \
 130     } while(counter < max)
 131 
 132 gboolean
 133 cluster_connect_cfg(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 134 {
 135     cs_error_t rc;
 136     int fd = -1, retries = 0, rv;
 137     uid_t found_uid = 0;
 138     gid_t found_gid = 0;
 139     pid_t found_pid = 0;
 140     uint32_t nodeid;
 141 
 142     static struct mainloop_fd_callbacks cfg_fd_callbacks = {
 143         .dispatch = pcmk_cfg_dispatch,
 144         .destroy = cfg_connection_destroy,
 145     };
 146 
 147     cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks));
 148 
 149     if (rc != CS_OK) {
 150         crm_crit("Could not connect to Corosync CFG: %s " CRM_XS " rc=%d",
 151                  cs_strerror(rc), rc);
 152         return FALSE;
 153     }
 154 
 155     rc = corosync_cfg_fd_get(cfg_handle, &fd);
 156     if (rc != CS_OK) {
 157         crm_crit("Could not get Corosync CFG descriptor: %s " CRM_XS " rc=%d",
 158                  cs_strerror(rc), rc);
 159         goto bail;
 160     }
 161 
 162     /* CFG provider run as root (in given user namespace, anyway)? */
 163     if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
 164                                             &found_uid, &found_gid))) {
 165         crm_crit("Rejecting Corosync CFG provider because process %lld "
 166                  "is running as uid %lld gid %lld, not root",
 167                   (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
 168                  (long long) found_uid, (long long) found_gid);
 169         goto bail;
 170     } else if (rv < 0) {
 171         crm_crit("Could not authenticate Corosync CFG provider: %s "
 172                  CRM_XS " rc=%d", strerror(-rv), -rv);
 173         goto bail;
 174     }
 175 
 176     retries = 0;
 177     cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid));
 178     if (rc != CS_OK) {
 179         crm_crit("Could not get local node ID from Corosync: %s "
 180                  CRM_XS " rc=%d", cs_strerror(rc), rc);
 181         goto bail;
 182     }
 183     crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid);
 184 
 185 #ifdef HAVE_COROSYNC_CFG_TRACKSTART
 186     rc = corosync_cfg_trackstart(cfg_handle, 0);
 187     if (rc != CS_OK) {
 188         crm_crit("Could not enable Corosync CFG shutdown tracker: %s " CRM_XS " rc=%d",
 189                  cs_strerror(rc), rc);
 190         goto bail;
 191     }
 192 #endif
 193 
 194     mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks);
 195     return TRUE;
 196 
 197   bail:
 198     corosync_cfg_finalize(cfg_handle);
 199     return FALSE;
 200 }
 201 
 202 void
 203 pcmkd_shutdown_corosync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 204 {
 205     cs_error_t rc;
 206 
 207     if (cfg_handle == 0) {
 208         crm_warn("Unable to shut down Corosync: No connection");
 209         return;
 210     }
 211     crm_info("Asking Corosync to shut down");
 212     rc = corosync_cfg_try_shutdown(cfg_handle,
 213                                     COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
 214     if (rc == CS_OK) {
 215         close_cfg();
 216     } else {
 217         crm_warn("Corosync shutdown failed: %s " CRM_XS " rc=%d",
 218                  cs_strerror(rc), rc);
 219     }
 220 }
 221 
 222 
 223 /* =::=::=::= Configuration =::=::=::= */
 224 static int
 225 get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value,
     /* [previous][next][first][last][top][bottom][index][help] */
 226                const char *fallback)
 227 {
 228     int rc = 0, retries = 0;
 229 
 230     cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value));
 231     if (rc != CS_OK) {
 232         crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback);
 233         if (fallback) {
 234             *value = strdup(fallback);
 235         } else {
 236             *value = NULL;
 237         }
 238     }
 239     crm_trace("%s: %s", key, *value);
 240     return rc;
 241 }
 242 
 243 gboolean
 244 mcp_read_config(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 245 {
 246     cs_error_t rc = CS_OK;
 247     int retries = 0;
 248     cmap_handle_t local_handle;
 249     uint64_t config = 0;
 250     int fd = -1;
 251     uid_t found_uid = 0;
 252     gid_t found_gid = 0;
 253     pid_t found_pid = 0;
 254     int rv;
 255     enum cluster_type_e stack;
 256 
 257     // There can be only one possibility
 258     do {
 259         rc = pcmk__init_cmap(&local_handle);
 260         if (rc != CS_OK) {
 261             retries++;
 262             crm_info("Could not connect to Corosync CMAP: %s (retrying in %ds) "
 263                      CRM_XS " rc=%d", cs_strerror(rc), retries, rc);
 264             sleep(retries);
 265 
 266         } else {
 267             break;
 268         }
 269 
 270     } while (retries < 5);
 271 
 272     if (rc != CS_OK) {
 273         crm_crit("Could not connect to Corosync CMAP: %s "
 274                  CRM_XS " rc=%d", cs_strerror(rc), rc);
 275         return FALSE;
 276     }
 277 
 278     rc = cmap_fd_get(local_handle, &fd);
 279     if (rc != CS_OK) {
 280         crm_crit("Could not get Corosync CMAP descriptor: %s " CRM_XS " rc=%d",
 281                  cs_strerror(rc), rc);
 282         cmap_finalize(local_handle);
 283         return FALSE;
 284     }
 285 
 286     /* CMAP provider run as root (in given user namespace, anyway)? */
 287     if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
 288                                             &found_uid, &found_gid))) {
 289         crm_crit("Rejecting Corosync CMAP provider because process %lld "
 290                  "is running as uid %lld gid %lld, not root",
 291                  (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
 292                  (long long) found_uid, (long long) found_gid);
 293         cmap_finalize(local_handle);
 294         return FALSE;
 295     } else if (rv < 0) {
 296         crm_crit("Could not authenticate Corosync CMAP provider: %s "
 297                  CRM_XS " rc=%d", strerror(-rv), -rv);
 298         cmap_finalize(local_handle);
 299         return FALSE;
 300     }
 301 
 302     stack = get_cluster_type();
 303     if (stack != pcmk_cluster_corosync) {
 304         crm_crit("Expected corosync stack but detected %s " CRM_XS " stack=%d",
 305                  name_for_cluster_type(stack), stack);
 306         return FALSE;
 307     }
 308 
 309     crm_info("Reading configuration for %s stack",
 310              name_for_cluster_type(stack));
 311     pcmk__set_env_option("cluster_type", "corosync");
 312     pcmk__set_env_option("quorum_type", "corosync");
 313 
 314     // If debug logging is not configured, check whether corosync has it
 315     if (pcmk__env_option("debug") == NULL) {
 316         char *debug_enabled = NULL;
 317 
 318         get_config_opt(config, local_handle, "logging.debug", &debug_enabled, "off");
 319 
 320         if (crm_is_true(debug_enabled)) {
 321             pcmk__set_env_option("debug", "1");
 322             if (get_crm_log_level() < LOG_DEBUG) {
 323                 set_crm_log_level(LOG_DEBUG);
 324             }
 325 
 326         } else {
 327             pcmk__set_env_option("debug", "0");
 328         }
 329 
 330         free(debug_enabled);
 331     }
 332 
 333     if(local_handle){
 334         gid_t gid = 0;
 335         if (pcmk_daemon_user(NULL, &gid) < 0) {
 336             crm_warn("Could not authorize group with Corosync " CRM_XS
 337                      " No group found for user %s", CRM_DAEMON_USER);
 338 
 339         } else {
 340             char key[PATH_MAX];
 341             snprintf(key, PATH_MAX, "uidgid.gid.%u", gid);
 342             rc = cmap_set_uint8(local_handle, key, 1);
 343             if (rc != CS_OK) {
 344                 crm_warn("Could not authorize group with Corosync: %s " CRM_XS
 345                          " group=%u rc=%d", pcmk__cs_err_str(rc), gid, rc);
 346             }
 347         }
 348     }
 349     cmap_finalize(local_handle);
 350 
 351     return TRUE;
 352 }

/* [previous][next][first][last][top][bottom][index][help] */