root/daemons/pacemakerd/pcmkd_corosync.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. cfg_shutdown_callback
  2. pcmk_cfg_dispatch
  3. close_cfg
  4. cluster_reconnect_cb
  5. cfg_connection_destroy
  6. cluster_disconnect_cfg
  7. cluster_connect_cfg
  8. pcmkd_shutdown_corosync
  9. pcmkd_corosync_connected
  10. get_config_opt
  11. mcp_read_config

   1 /*
   2  * Copyright 2010-2021 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include "pacemakerd.h"
  12 
  13 #include <sys/utsname.h>
  14 #include <sys/stat.h>           /* for calls to stat() */
  15 #include <libgen.h>             /* For basename() and dirname() */
  16 
  17 #include <sys/types.h>
  18 #include <pwd.h>                /* For getpwname() */
  19 
  20 #include <corosync/hdb.h>
  21 #include <corosync/cfg.h>
  22 #include <corosync/cpg.h>
  23 #include <corosync/cmap.h>
  24 
  25 #include <crm/cluster/internal.h>
  26 #include <crm/common/ipc.h>     /* for crm_ipc_is_authentic_process */
  27 #include <crm/common/mainloop.h>
  28 
  29 #include <crm/common/ipc_internal.h>  /* PCMK__SPECIAL_PID* */
  30 
  31 static corosync_cfg_handle_t cfg_handle = 0;
  32 static mainloop_timer_t *reconnect_timer = NULL;
  33 
  34 /* =::=::=::= CFG - Shutdown stuff =::=::=::= */
  35 
  36 static void
  37 cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags)
     /* [previous][next][first][last][top][bottom][index][help] */
  38 {
  39     crm_info("Corosync wants to shut down: %s",
  40              (flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" :
  41              (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional");
  42 
  43     /* Never allow corosync to shut down while we're running */
  44     corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO);
  45 }
  46 
  47 static corosync_cfg_callbacks_t cfg_callbacks = {
  48     .corosync_cfg_shutdown_callback = cfg_shutdown_callback,
  49 };
  50 
  51 static int
  52 pcmk_cfg_dispatch(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  53 {
  54     corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data;
  55     cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL);
  56 
  57     if (rc != CS_OK) {
  58         return -1;
  59     }
  60     return 0;
  61 }
  62 
  63 static void
  64 close_cfg(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  65 {
  66     if (cfg_handle != 0) {
  67 #ifdef HAVE_COROSYNC_CFG_TRACKSTART
  68         /* Ideally, we would call corosync_cfg_trackstop(cfg_handle) here, but a
  69          * bug in corosync 3.1.1 and 3.1.2 makes it hang forever. Thankfully,
  70          * it's not necessary since we exit immediately after this.
  71          */
  72 #endif
  73         corosync_cfg_finalize(cfg_handle);
  74         cfg_handle = 0;
  75     }
  76 }
  77 
  78 static gboolean
  79 cluster_reconnect_cb(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
  80 {
  81     if (cluster_connect_cfg()) {
  82         mainloop_timer_del(reconnect_timer);
  83         reconnect_timer = NULL;
  84         crm_notice("Cluster reconnect succeeded");
  85         mcp_read_config();
  86         restart_cluster_subdaemons();
  87         return G_SOURCE_REMOVE;
  88     } else {
  89         crm_info("Cluster reconnect failed"
  90                  "(connection will be reattempted once per second)");
  91     }
  92     /*
  93      * In theory this will continue forever. In practice the CIB connection from
  94      * attrd will timeout and shut down Pacemaker when it gets bored.
  95      */
  96     return G_SOURCE_CONTINUE;
  97 }
  98 
  99 
 100 static void
 101 cfg_connection_destroy(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 102 {
 103     crm_warn("Lost connection to cluster layer "
 104              "(connection will be reattempted once per second)");
 105     corosync_cfg_finalize(cfg_handle);
 106     cfg_handle = 0;
 107     reconnect_timer = mainloop_timer_add("corosync reconnect", 1000, TRUE, cluster_reconnect_cb, NULL);
 108     mainloop_timer_start(reconnect_timer);
 109 }
 110 
 111 void
 112 cluster_disconnect_cfg(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 113 {
 114     close_cfg();
 115     if (reconnect_timer != NULL) {
 116         /* The mainloop should be gone by this point, so this isn't necessary,
 117          * but cleaning up memory should make valgrind happier.
 118          */
 119         mainloop_timer_del(reconnect_timer);
 120         reconnect_timer = NULL;
 121     }
 122 }
 123 
 124 #define cs_repeat(counter, max, code) do {              \
 125         code;                                           \
 126         if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {  \
 127             counter++;                                  \
 128             crm_debug("Retrying Corosync operation after %ds", counter);    \
 129             sleep(counter);                             \
 130         } else {                                        \
 131             break;                                      \
 132         }                                               \
 133     } while(counter < max)
 134 
 135 gboolean
 136 cluster_connect_cfg(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 137 {
 138     cs_error_t rc;
 139     int fd = -1, retries = 0, rv;
 140     uid_t found_uid = 0;
 141     gid_t found_gid = 0;
 142     pid_t found_pid = 0;
 143     uint32_t nodeid;
 144 
 145     static struct mainloop_fd_callbacks cfg_fd_callbacks = {
 146         .dispatch = pcmk_cfg_dispatch,
 147         .destroy = cfg_connection_destroy,
 148     };
 149 
 150     cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks));
 151 
 152     if (rc != CS_OK) {
 153         crm_crit("Could not connect to Corosync CFG: %s " CRM_XS " rc=%d",
 154                  cs_strerror(rc), rc);
 155         return FALSE;
 156     }
 157 
 158     rc = corosync_cfg_fd_get(cfg_handle, &fd);
 159     if (rc != CS_OK) {
 160         crm_crit("Could not get Corosync CFG descriptor: %s " CRM_XS " rc=%d",
 161                  cs_strerror(rc), rc);
 162         goto bail;
 163     }
 164 
 165     /* CFG provider run as root (in given user namespace, anyway)? */
 166     if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
 167                                             &found_uid, &found_gid))) {
 168         crm_crit("Rejecting Corosync CFG provider because process %lld "
 169                  "is running as uid %lld gid %lld, not root",
 170                   (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
 171                  (long long) found_uid, (long long) found_gid);
 172         goto bail;
 173     } else if (rv < 0) {
 174         crm_crit("Could not authenticate Corosync CFG provider: %s "
 175                  CRM_XS " rc=%d", strerror(-rv), -rv);
 176         goto bail;
 177     }
 178 
 179     retries = 0;
 180     cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid));
 181     if (rc != CS_OK) {
 182         crm_crit("Could not get local node ID from Corosync: %s "
 183                  CRM_XS " rc=%d", cs_strerror(rc), rc);
 184         goto bail;
 185     }
 186     crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid);
 187 
 188 #ifdef HAVE_COROSYNC_CFG_TRACKSTART
 189     rc = corosync_cfg_trackstart(cfg_handle, 0);
 190     if (rc != CS_OK) {
 191         crm_crit("Could not enable Corosync CFG shutdown tracker: %s " CRM_XS " rc=%d",
 192                  cs_strerror(rc), rc);
 193         goto bail;
 194     }
 195 #endif
 196 
 197     mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks);
 198     return TRUE;
 199 
 200   bail:
 201     corosync_cfg_finalize(cfg_handle);
 202     return FALSE;
 203 }
 204 
 205 void
 206 pcmkd_shutdown_corosync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 207 {
 208     cs_error_t rc;
 209 
 210     if (cfg_handle == 0) {
 211         crm_warn("Unable to shut down Corosync: No connection");
 212         return;
 213     }
 214     crm_info("Asking Corosync to shut down");
 215     rc = corosync_cfg_try_shutdown(cfg_handle,
 216                                     COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE);
 217     if (rc == CS_OK) {
 218         close_cfg();
 219     } else {
 220         crm_warn("Corosync shutdown failed: %s " CRM_XS " rc=%d",
 221                  cs_strerror(rc), rc);
 222     }
 223 }
 224 
 225 bool
 226 pcmkd_corosync_connected(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 227 {
 228     cpg_handle_t local_handle = 0;
 229     cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0};
 230     int fd = -1;
 231 
 232     if (cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *) &cpg_model_info, NULL) != CS_OK) {
 233         return false;
 234     }
 235 
 236     if (cpg_fd_get(local_handle, &fd) != CS_OK) {
 237         return false;
 238     }
 239 
 240     cpg_finalize(local_handle);
 241 
 242     return true;
 243 }
 244 
 245 /* =::=::=::= Configuration =::=::=::= */
 246 static int
 247 get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value,
     /* [previous][next][first][last][top][bottom][index][help] */
 248                const char *fallback)
 249 {
 250     int rc = 0, retries = 0;
 251 
 252     cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value));
 253     if (rc != CS_OK) {
 254         crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback);
 255         if (fallback) {
 256             *value = strdup(fallback);
 257         } else {
 258             *value = NULL;
 259         }
 260     }
 261     crm_trace("%s: %s", key, *value);
 262     return rc;
 263 }
 264 
 265 gboolean
 266 mcp_read_config(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 267 {
 268     cs_error_t rc = CS_OK;
 269     int retries = 0;
 270     cmap_handle_t local_handle;
 271     uint64_t config = 0;
 272     int fd = -1;
 273     uid_t found_uid = 0;
 274     gid_t found_gid = 0;
 275     pid_t found_pid = 0;
 276     int rv;
 277     enum cluster_type_e stack;
 278 
 279     // There can be only one possibility
 280     do {
 281         rc = pcmk__init_cmap(&local_handle);
 282         if (rc != CS_OK) {
 283             retries++;
 284             crm_info("Could not connect to Corosync CMAP: %s (retrying in %ds) "
 285                      CRM_XS " rc=%d", cs_strerror(rc), retries, rc);
 286             sleep(retries);
 287 
 288         } else {
 289             break;
 290         }
 291 
 292     } while (retries < 5);
 293 
 294     if (rc != CS_OK) {
 295         crm_crit("Could not connect to Corosync CMAP: %s "
 296                  CRM_XS " rc=%d", cs_strerror(rc), rc);
 297         return FALSE;
 298     }
 299 
 300     rc = cmap_fd_get(local_handle, &fd);
 301     if (rc != CS_OK) {
 302         crm_crit("Could not get Corosync CMAP descriptor: %s " CRM_XS " rc=%d",
 303                  cs_strerror(rc), rc);
 304         cmap_finalize(local_handle);
 305         return FALSE;
 306     }
 307 
 308     /* CMAP provider run as root (in given user namespace, anyway)? */
 309     if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
 310                                             &found_uid, &found_gid))) {
 311         crm_crit("Rejecting Corosync CMAP provider because process %lld "
 312                  "is running as uid %lld gid %lld, not root",
 313                  (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
 314                  (long long) found_uid, (long long) found_gid);
 315         cmap_finalize(local_handle);
 316         return FALSE;
 317     } else if (rv < 0) {
 318         crm_crit("Could not authenticate Corosync CMAP provider: %s "
 319                  CRM_XS " rc=%d", strerror(-rv), -rv);
 320         cmap_finalize(local_handle);
 321         return FALSE;
 322     }
 323 
 324     stack = get_cluster_type();
 325     if (stack != pcmk_cluster_corosync) {
 326         crm_crit("Expected Corosync cluster layer but detected %s "
 327                  CRM_XS " stack=%d", name_for_cluster_type(stack), stack);
 328         return FALSE;
 329     }
 330 
 331     crm_info("Reading configuration for %s stack",
 332              name_for_cluster_type(stack));
 333     pcmk__set_env_option(PCMK__ENV_CLUSTER_TYPE, "corosync");
 334     pcmk__set_env_option(PCMK__ENV_QUORUM_TYPE, "corosync");
 335 
 336     // If debug logging is not configured, check whether corosync has it
 337     if (pcmk__env_option(PCMK__ENV_DEBUG) == NULL) {
 338         char *debug_enabled = NULL;
 339 
 340         get_config_opt(config, local_handle, "logging.debug", &debug_enabled, "off");
 341 
 342         if (crm_is_true(debug_enabled)) {
 343             pcmk__set_env_option(PCMK__ENV_DEBUG, "1");
 344             if (get_crm_log_level() < LOG_DEBUG) {
 345                 set_crm_log_level(LOG_DEBUG);
 346             }
 347 
 348         } else {
 349             pcmk__set_env_option(PCMK__ENV_DEBUG, "0");
 350         }
 351 
 352         free(debug_enabled);
 353     }
 354 
 355     if(local_handle){
 356         gid_t gid = 0;
 357         if (pcmk_daemon_user(NULL, &gid) < 0) {
 358             crm_warn("Could not authorize group with Corosync " CRM_XS
 359                      " No group found for user %s", CRM_DAEMON_USER);
 360 
 361         } else {
 362             char key[PATH_MAX];
 363             snprintf(key, PATH_MAX, "uidgid.gid.%u", gid);
 364             rc = cmap_set_uint8(local_handle, key, 1);
 365             if (rc != CS_OK) {
 366                 crm_warn("Could not authorize group with Corosync: %s " CRM_XS
 367                          " group=%u rc=%d", pcmk__cs_err_str(rc), gid, rc);
 368             }
 369         }
 370     }
 371     cmap_finalize(local_handle);
 372 
 373     return TRUE;
 374 }

/* [previous][next][first][last][top][bottom][index][help] */