root/daemons/pacemakerd/pcmkd_corosync.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. cfg_shutdown_callback
  2. pcmk_cfg_dispatch
  3. close_cfg
  4. cluster_reconnect_cb
  5. cfg_connection_destroy
  6. cluster_disconnect_cfg
  7. cluster_connect_cfg
  8. pcmkd_shutdown_corosync
  9. pcmkd_corosync_connected
  10. get_config_opt
  11. mcp_read_config

   1 /*
   2  * Copyright 2010-2022 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include "pacemakerd.h"
  12 
  13 #include <sys/utsname.h>
  14 #include <sys/stat.h>           /* for calls to stat() */
  15 #include <libgen.h>             /* For basename() and dirname() */
  16 
  17 #include <sys/types.h>
  18 #include <pwd.h>                /* For getpwname() */
  19 
  20 #include <corosync/hdb.h>
  21 #include <corosync/cfg.h>
  22 #include <corosync/cpg.h>
  23 #include <corosync/cmap.h>
  24 
  25 #include <crm/cluster/internal.h>
  26 #include <crm/common/ipc.h>     /* for crm_ipc_is_authentic_process */
  27 #include <crm/common/mainloop.h>
  28 
  29 #include <crm/common/ipc_internal.h>  /* PCMK__SPECIAL_PID* */
  30 
  31 static corosync_cfg_handle_t cfg_handle = 0;
  32 static mainloop_timer_t *reconnect_timer = NULL;
  33 
  34 /* =::=::=::= CFG - Shutdown stuff =::=::=::= */
  35 
  36 static void
  37 cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags)
     /* [previous][next][first][last][top][bottom][index][help] */
  38 {
  39     crm_info("Corosync wants to shut down: %s",
  40              (flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" :
  41              (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional");
  42 
  43     /* Never allow corosync to shut down while we're running */
  44     corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO);
  45 }
  46 
  47 static corosync_cfg_callbacks_t cfg_callbacks = {
  48     .corosync_cfg_shutdown_callback = cfg_shutdown_callback,
  49 };
  50 
  51 static int
  52 pcmk_cfg_dispatch(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  53 {
  54     corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data;
  55     cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL);
  56 
  57     if (rc != CS_OK) {
  58         return -1;
  59     }
  60     return 0;
  61 }
  62 
  63 static void
  64 close_cfg(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  65 {
  66     if (cfg_handle != 0) {
  67 #ifdef HAVE_COROSYNC_CFG_TRACKSTART
  68         /* Ideally, we would call corosync_cfg_trackstop(cfg_handle) here, but a
  69          * bug in corosync 3.1.1 and 3.1.2 makes it hang forever. Thankfully,
  70          * it's not necessary since we exit immediately after this.
  71          */
  72 #endif
  73         corosync_cfg_finalize(cfg_handle);
  74         cfg_handle = 0;
  75     }
  76 }
  77 
  78 static gboolean
  79 cluster_reconnect_cb(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
  80 {
  81     if (cluster_connect_cfg()) {
  82         mainloop_timer_del(reconnect_timer);
  83         reconnect_timer = NULL;
  84         crm_notice("Cluster reconnect succeeded");
  85         mcp_read_config();
  86         restart_cluster_subdaemons();
  87         return G_SOURCE_REMOVE;
  88     } else {
  89         crm_info("Cluster reconnect failed "
  90                  "(connection will be reattempted once per second)");
  91     }
  92     /*
  93      * In theory this will continue forever. In practice the CIB connection from
  94      * attrd will timeout and shut down Pacemaker when it gets bored.
  95      */
  96     return G_SOURCE_CONTINUE;
  97 }
  98 
  99 
 100 static void
 101 cfg_connection_destroy(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 102 {
 103     crm_warn("Lost connection to cluster layer "
 104              "(connection will be reattempted once per second)");
 105     corosync_cfg_finalize(cfg_handle);
 106     cfg_handle = 0;
 107     reconnect_timer = mainloop_timer_add("corosync reconnect", 1000, TRUE, cluster_reconnect_cb, NULL);
 108     mainloop_timer_start(reconnect_timer);
 109 }
 110 
 111 void
 112 cluster_disconnect_cfg(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 113 {
 114     close_cfg();
 115     if (reconnect_timer != NULL) {
 116         /* The mainloop should be gone by this point, so this isn't necessary,
 117          * but cleaning up memory should make valgrind happier.
 118          */
 119         mainloop_timer_del(reconnect_timer);
 120         reconnect_timer = NULL;
 121     }
 122 }
 123 
 124 #define cs_repeat(counter, max, code) do {              \
 125         code;                                           \
 126         if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {  \
 127             counter++;                                  \
 128             crm_debug("Retrying Corosync operation after %ds", counter);    \
 129             sleep(counter);                             \
 130         } else {                                        \
 131             break;                                      \
 132         }                                               \
 133     } while(counter < max)
 134 
 135 gboolean
 136 cluster_connect_cfg(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 137 {
 138     cs_error_t rc;
 139     int fd = -1, retries = 0, rv;
 140     uid_t found_uid = 0;
 141     gid_t found_gid = 0;
 142     pid_t found_pid = 0;
 143     uint32_t nodeid;
 144 
 145     static struct mainloop_fd_callbacks cfg_fd_callbacks = {
 146         .dispatch = pcmk_cfg_dispatch,
 147         .destroy = cfg_connection_destroy,
 148     };
 149 
 150     cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks));
 151 
 152     if (rc != CS_OK) {
 153         crm_crit("Could not connect to Corosync CFG: %s " CRM_XS " rc=%d",
 154                  cs_strerror(rc), rc);
 155         return FALSE;
 156     }
 157 
 158     rc = corosync_cfg_fd_get(cfg_handle, &fd);
 159     if (rc != CS_OK) {
 160         crm_crit("Could not get Corosync CFG descriptor: %s " CRM_XS " rc=%d",
 161                  cs_strerror(rc), rc);
 162         goto bail;
 163     }
 164 
 165     /* CFG provider run as root (in given user namespace, anyway)? */
 166     if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
 167                                             &found_uid, &found_gid))) {
 168         crm_crit("Rejecting Corosync CFG provider because process %lld "
 169                  "is running as uid %lld gid %lld, not root",
 170                   (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
 171                  (long long) found_uid, (long long) found_gid);
 172         goto bail;
 173     } else if (rv < 0) {
 174         crm_crit("Could not authenticate Corosync CFG provider: %s "
 175                  CRM_XS " rc=%d", strerror(-rv), -rv);
 176         goto bail;
 177     }
 178 
 179     retries = 0;
 180     cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid));
 181     if (rc != CS_OK) {
 182         crm_crit("Could not get local node ID from Corosync: %s "
 183                  CRM_XS " rc=%d", cs_strerror(rc), rc);
 184         goto bail;
 185     }
 186     crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid);
 187 
 188 #ifdef HAVE_COROSYNC_CFG_TRACKSTART
 189     retries = 0;
 190     cs_repeat(retries, 30, rc = corosync_cfg_trackstart(cfg_handle, 0));
 191     if (rc != CS_OK) {
 192         crm_crit("Could not enable Corosync CFG shutdown tracker: %s " CRM_XS " rc=%d",
 193                  cs_strerror(rc), rc);
 194         goto bail;
 195     }
 196 #endif
 197 
 198     mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks);
 199     return TRUE;
 200 
 201   bail:
 202     corosync_cfg_finalize(cfg_handle);
 203     return FALSE;
 204 }
 205 
 206 void
 207 pcmkd_shutdown_corosync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 208 {
 209     cs_error_t rc;
 210 
 211     if (cfg_handle == 0) {
 212         crm_warn("Unable to shut down Corosync: No connection");
 213         return;
 214     }
 215     crm_info("Asking Corosync to shut down");
 216     rc = corosync_cfg_try_shutdown(cfg_handle,
 217                                     COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
 218     if (rc == CS_OK) {
 219         close_cfg();
 220     } else {
 221         crm_warn("Corosync shutdown failed: %s " CRM_XS " rc=%d",
 222                  cs_strerror(rc), rc);
 223     }
 224 }
 225 
 226 bool
 227 pcmkd_corosync_connected(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 228 {
 229     cpg_handle_t local_handle = 0;
 230     cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0};
 231     int fd = -1;
 232 
 233     if (cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *) &cpg_model_info, NULL) != CS_OK) {
 234         return false;
 235     }
 236 
 237     if (cpg_fd_get(local_handle, &fd) != CS_OK) {
 238         return false;
 239     }
 240 
 241     cpg_finalize(local_handle);
 242 
 243     return true;
 244 }
 245 
 246 /* =::=::=::= Configuration =::=::=::= */
 247 static int
 248 get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value,
     /* [previous][next][first][last][top][bottom][index][help] */
 249                const char *fallback)
 250 {
 251     int rc = 0, retries = 0;
 252 
 253     cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value));
 254     if (rc != CS_OK) {
 255         crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback);
 256         pcmk__str_update(value, fallback);
 257     }
 258     crm_trace("%s: %s", key, *value);
 259     return rc;
 260 }
 261 
 262 gboolean
 263 mcp_read_config(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 264 {
 265     cs_error_t rc = CS_OK;
 266     int retries = 0;
 267     cmap_handle_t local_handle;
 268     uint64_t config = 0;
 269     int fd = -1;
 270     uid_t found_uid = 0;
 271     gid_t found_gid = 0;
 272     pid_t found_pid = 0;
 273     int rv;
 274     enum cluster_type_e stack;
 275 
 276     // There can be only one possibility
 277     do {
 278         rc = pcmk__init_cmap(&local_handle);
 279         if (rc != CS_OK) {
 280             retries++;
 281             crm_info("Could not connect to Corosync CMAP: %s (retrying in %ds) "
 282                      CRM_XS " rc=%d", cs_strerror(rc), retries, rc);
 283             sleep(retries);
 284 
 285         } else {
 286             break;
 287         }
 288 
 289     } while (retries < 5);
 290 
 291     if (rc != CS_OK) {
 292         crm_crit("Could not connect to Corosync CMAP: %s "
 293                  CRM_XS " rc=%d", cs_strerror(rc), rc);
 294         return FALSE;
 295     }
 296 
 297     rc = cmap_fd_get(local_handle, &fd);
 298     if (rc != CS_OK) {
 299         crm_crit("Could not get Corosync CMAP descriptor: %s " CRM_XS " rc=%d",
 300                  cs_strerror(rc), rc);
 301         cmap_finalize(local_handle);
 302         return FALSE;
 303     }
 304 
 305     /* CMAP provider run as root (in given user namespace, anyway)? */
 306     if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
 307                                             &found_uid, &found_gid))) {
 308         crm_crit("Rejecting Corosync CMAP provider because process %lld "
 309                  "is running as uid %lld gid %lld, not root",
 310                  (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
 311                  (long long) found_uid, (long long) found_gid);
 312         cmap_finalize(local_handle);
 313         return FALSE;
 314     } else if (rv < 0) {
 315         crm_crit("Could not authenticate Corosync CMAP provider: %s "
 316                  CRM_XS " rc=%d", strerror(-rv), -rv);
 317         cmap_finalize(local_handle);
 318         return FALSE;
 319     }
 320 
 321     stack = get_cluster_type();
 322     if (stack != pcmk_cluster_corosync) {
 323         crm_crit("Expected Corosync cluster layer but detected %s "
 324                  CRM_XS " stack=%d", name_for_cluster_type(stack), stack);
 325         return FALSE;
 326     }
 327 
 328     crm_info("Reading configuration for %s stack",
 329              name_for_cluster_type(stack));
 330     pcmk__set_env_option(PCMK__ENV_CLUSTER_TYPE, "corosync");
 331     pcmk__set_env_option(PCMK__ENV_QUORUM_TYPE, "corosync");
 332 
 333     // If debug logging is not configured, check whether corosync has it
 334     if (pcmk__env_option(PCMK__ENV_DEBUG) == NULL) {
 335         char *debug_enabled = NULL;
 336 
 337         get_config_opt(config, local_handle, "logging.debug", &debug_enabled, "off");
 338 
 339         if (crm_is_true(debug_enabled)) {
 340             pcmk__set_env_option(PCMK__ENV_DEBUG, "1");
 341             if (get_crm_log_level() < LOG_DEBUG) {
 342                 set_crm_log_level(LOG_DEBUG);
 343             }
 344 
 345         } else {
 346             pcmk__set_env_option(PCMK__ENV_DEBUG, "0");
 347         }
 348 
 349         free(debug_enabled);
 350     }
 351 
 352     if(local_handle){
 353         gid_t gid = 0;
 354         if (pcmk_daemon_user(NULL, &gid) < 0) {
 355             crm_warn("Could not authorize group with Corosync " CRM_XS
 356                      " No group found for user %s", CRM_DAEMON_USER);
 357 
 358         } else {
 359             char key[PATH_MAX];
 360             snprintf(key, PATH_MAX, "uidgid.gid.%u", gid);
 361             rc = cmap_set_uint8(local_handle, key, 1);
 362             if (rc != CS_OK) {
 363                 crm_warn("Could not authorize group with Corosync: %s " CRM_XS
 364                          " group=%u rc=%d", pcmk__cs_err_str(rc), gid, rc);
 365             }
 366         }
 367     }
 368     cmap_finalize(local_handle);
 369 
 370     return TRUE;
 371 }

/* [previous][next][first][last][top][bottom][index][help] */