root/daemons/controld/controld_control.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. do_ha_control
  2. do_shutdown
  3. do_shutdown_req
  4. crmd_fast_exit
  5. crmd_exit
  6. do_exit
  7. sigpipe_ignore
  8. do_startup
  9. accept_controller_client
  10. dispatch_controller_ipc
  11. crmd_ipc_closed
  12. crmd_ipc_destroy
  13. do_stop
  14. do_started
  15. do_recover
  16. crmd_metadata
  17. verify_crmd_options
  18. crmd_pref
  19. config_query_callback
  20. crm_read_options
  21. do_read_config
  22. crm_shutdown

   1 /*
   2  * Copyright 2004-2021 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/param.h>
  13 #include <sys/types.h>
  14 #include <sys/stat.h>
  15 
  16 #include <crm/crm.h>
  17 #include <crm/msg_xml.h>
  18 #include <crm/pengine/rules.h>
  19 #include <crm/cluster/internal.h>
  20 #include <crm/cluster/election_internal.h>
  21 #include <crm/common/ipc_internal.h>
  22 
  23 #include <pacemaker-controld.h>
  24 
  25 qb_ipcs_service_t *ipcs = NULL;
  26 
  27 #if SUPPORT_COROSYNC
  28 extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
  29 #endif
  30 
  31 void crm_shutdown(int nsig);
  32 gboolean crm_read_options(gpointer user_data);
  33 
  34 gboolean fsa_has_quorum = FALSE;
  35 crm_trigger_t *fsa_source = NULL;
  36 crm_trigger_t *config_read = NULL;
  37 bool no_quorum_suicide_escalation = FALSE;
  38 bool controld_shutdown_lock_enabled = false;
  39 
  40 /*       A_HA_CONNECT   */
  41 void
  42 do_ha_control(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
  43               enum crmd_fsa_cause cause,
  44               enum crmd_fsa_state cur_state,
  45               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
  46 {
  47     gboolean registered = FALSE;
  48     static crm_cluster_t *cluster = NULL;
  49 
  50     if (cluster == NULL) {
  51         cluster = calloc(1, sizeof(crm_cluster_t));
  52     }
  53 
  54     if (action & A_HA_DISCONNECT) {
  55         crm_cluster_disconnect(cluster);
  56         crm_info("Disconnected from the cluster");
  57 
  58         controld_set_fsa_input_flags(R_HA_DISCONNECTED);
  59     }
  60 
  61     if (action & A_HA_CONNECT) {
  62         crm_set_status_callback(&peer_update_callback);
  63         crm_set_autoreap(FALSE);
  64 
  65         if (is_corosync_cluster()) {
  66 #if SUPPORT_COROSYNC
  67             registered = crm_connect_corosync(cluster);
  68 #endif
  69         }
  70 
  71         if (registered == TRUE) {
  72             controld_election_init(cluster->uname);
  73             fsa_our_uname = cluster->uname;
  74             fsa_our_uuid = cluster->uuid;
  75             if(cluster->uuid == NULL) {
  76                 crm_err("Could not obtain local uuid");
  77                 registered = FALSE;
  78             }
  79         }
  80 
  81         if (registered == FALSE) {
  82             controld_set_fsa_input_flags(R_HA_DISCONNECTED);
  83             register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
  84             return;
  85         }
  86 
  87         populate_cib_nodes(node_update_none, __func__);
  88         controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
  89         crm_info("Connected to the cluster");
  90     }
  91 
  92     if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
  93         crm_err("Unexpected action %s in %s", fsa_action2string(action),
  94                 __func__);
  95     }
  96 }
  97 
  98 /*       A_SHUTDOWN     */
  99 void
 100 do_shutdown(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 101             enum crmd_fsa_cause cause,
 102             enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 103 {
 104     /* just in case */
 105     controld_set_fsa_input_flags(R_SHUTDOWN);
 106     controld_disconnect_fencer(FALSE);
 107 }
 108 
 109 /*       A_SHUTDOWN_REQ */
 110 void
 111 do_shutdown_req(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 112                 enum crmd_fsa_cause cause,
 113                 enum crmd_fsa_state cur_state,
 114                 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 115 {
 116     xmlNode *msg = NULL;
 117 
 118     controld_set_fsa_input_flags(R_SHUTDOWN);
 119     //controld_set_fsa_input_flags(R_STAYDOWN);
 120     crm_info("Sending shutdown request to all peers (DC is %s)",
 121              (fsa_our_dc? fsa_our_dc : "not set"));
 122     msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
 123 
 124     if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
 125         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 126     }
 127     free_xml(msg);
 128 }
 129 
 130 extern char *max_generation_from;
 131 extern xmlNode *max_generation_xml;
 132 extern GHashTable *resource_history;
 133 extern GHashTable *voted;
 134 
 135 void
 136 crmd_fast_exit(crm_exit_t exit_code)
     /* [previous][next][first][last][top][bottom][index][help] */
 137 {
 138     if (pcmk_is_set(fsa_input_register, R_STAYDOWN)) {
 139         crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
 140                  exit_code, CRM_EX_FATAL);
 141         exit_code = CRM_EX_FATAL;
 142 
 143     } else if ((exit_code == CRM_EX_OK)
 144                && pcmk_is_set(fsa_input_register, R_IN_RECOVERY)) {
 145         crm_err("Could not recover from internal error");
 146         exit_code = CRM_EX_ERROR;
 147     }
 148     crm_exit(exit_code);
 149 }
 150 
 151 crm_exit_t
 152 crmd_exit(crm_exit_t exit_code)
     /* [previous][next][first][last][top][bottom][index][help] */
 153 {
 154     GList *gIter = NULL;
 155     GMainLoop *mloop = crmd_mainloop;
 156 
 157     static bool in_progress = FALSE;
 158 
 159     if (in_progress && (exit_code == CRM_EX_OK)) {
 160         crm_debug("Exit is already in progress");
 161         return exit_code;
 162 
 163     } else if(in_progress) {
 164         crm_notice("Error during shutdown process, exiting now with status %d (%s)",
 165                    exit_code, crm_exit_str(exit_code));
 166         crm_write_blackbox(SIGTRAP, NULL);
 167         crmd_fast_exit(exit_code);
 168     }
 169 
 170     in_progress = TRUE;
 171     crm_trace("Preparing to exit with status %d (%s)",
 172               exit_code, crm_exit_str(exit_code));
 173 
 174     /* Suppress secondary errors resulting from us disconnecting everything */
 175     controld_set_fsa_input_flags(R_HA_DISCONNECTED);
 176 
 177 /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
 178 
 179     if(ipcs) {
 180         crm_trace("Closing IPC server");
 181         mainloop_del_ipc_server(ipcs);
 182         ipcs = NULL;
 183     }
 184 
 185     controld_close_attrd_ipc();
 186     pe_subsystem_free();
 187     controld_disconnect_fencer(TRUE);
 188 
 189     if ((exit_code == CRM_EX_OK) && (crmd_mainloop == NULL)) {
 190         crm_debug("No mainloop detected");
 191         exit_code = CRM_EX_ERROR;
 192     }
 193 
 194     /* On an error, just get out.
 195      *
 196      * Otherwise, make the effort to have mainloop exit gracefully so
 197      * that it (mostly) cleans up after itself and valgrind has less
 198      * to report on - allowing real errors stand out
 199      */
 200     if (exit_code != CRM_EX_OK) {
 201         crm_notice("Forcing immediate exit with status %d (%s)",
 202                    exit_code, crm_exit_str(exit_code));
 203         crm_write_blackbox(SIGTRAP, NULL);
 204         crmd_fast_exit(exit_code);
 205     }
 206 
 207 /* Clean up as much memory as possible for valgrind */
 208 
 209     for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) {
 210         fsa_data_t *fsa_data = gIter->data;
 211 
 212         crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
 213                  fsa_input2string(fsa_data->fsa_input),
 214                  fsa_state2string(fsa_state),
 215                  fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
 216         delete_fsa_input(fsa_data);
 217     }
 218 
 219     controld_clear_fsa_input_flags(R_MEMBERSHIP);
 220     g_list_free(fsa_message_queue); fsa_message_queue = NULL;
 221 
 222     metadata_cache_fini();
 223     controld_election_fini();
 224 
 225     /* Tear down the CIB manager connection, but don't free it yet -- it could
 226      * be used when we drain the mainloop later.
 227      */
 228     fsa_cib_conn->cmds->del_notify_callback(fsa_cib_conn, T_CIB_REPLACE_NOTIFY, do_cib_replaced);
 229     fsa_cib_conn->cmds->del_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated);
 230     cib_free_callbacks(fsa_cib_conn);
 231     fsa_cib_conn->cmds->signoff(fsa_cib_conn);
 232 
 233     verify_stopped(fsa_state, LOG_WARNING);
 234     controld_clear_fsa_input_flags(R_LRM_CONNECTED);
 235     lrm_state_destroy_all();
 236 
 237     /* This basically will not work, since mainloop has a reference to it */
 238     mainloop_destroy_trigger(fsa_source); fsa_source = NULL;
 239 
 240     mainloop_destroy_trigger(config_read); config_read = NULL;
 241     mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL;
 242 
 243     pcmk__client_cleanup();
 244     crm_peer_destroy();
 245 
 246     controld_free_fsa_timers();
 247     te_cleanup_stonith_history_sync(NULL, TRUE);
 248     controld_free_sched_timer();
 249 
 250     free(fsa_our_dc_version); fsa_our_dc_version = NULL;
 251     free(fsa_our_uname); fsa_our_uname = NULL;
 252     free(fsa_our_uuid); fsa_our_uuid = NULL;
 253     free(fsa_our_dc); fsa_our_dc = NULL;
 254 
 255     free(fsa_cluster_name); fsa_cluster_name = NULL;
 256 
 257     free(te_uuid); te_uuid = NULL;
 258     free(failed_stop_offset); failed_stop_offset = NULL;
 259     free(failed_start_offset); failed_start_offset = NULL;
 260 
 261     free(max_generation_from); max_generation_from = NULL;
 262     free_xml(max_generation_xml); max_generation_xml = NULL;
 263 
 264     mainloop_destroy_signal(SIGPIPE);
 265     mainloop_destroy_signal(SIGUSR1);
 266     mainloop_destroy_signal(SIGTERM);
 267     mainloop_destroy_signal(SIGTRAP);
 268     /* leave SIGCHLD engaged as we might still want to drain some service-actions */
 269 
 270     if (mloop) {
 271         GMainContext *ctx = g_main_loop_get_context(crmd_mainloop);
 272 
 273         /* Don't re-enter this block */
 274         crmd_mainloop = NULL;
 275 
 276         /* no signals on final draining anymore */
 277         mainloop_destroy_signal(SIGCHLD);
 278 
 279         crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
 280 
 281         {
 282             int lpc = 0;
 283 
 284             while((g_main_context_pending(ctx) && lpc < 10)) {
 285                 lpc++;
 286                 crm_trace("Iteration %d", lpc);
 287                 g_main_context_dispatch(ctx);
 288             }
 289         }
 290 
 291         crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
 292         g_main_loop_quit(mloop);
 293 
 294         /* Won't do anything yet, since we're inside it now */
 295         g_main_loop_unref(mloop);
 296     } else {
 297         mainloop_destroy_signal(SIGCHLD);
 298     }
 299 
 300     cib_delete(fsa_cib_conn);
 301     fsa_cib_conn = NULL;
 302 
 303     throttle_fini();
 304 
 305     /* Graceful */
 306     crm_trace("Done preparing for exit with status %d (%s)",
 307               exit_code, crm_exit_str(exit_code));
 308     return exit_code;
 309 }
 310 
 311 /*       A_EXIT_0, A_EXIT_1     */
 312 void
 313 do_exit(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 314         enum crmd_fsa_cause cause,
 315         enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 316 {
 317     crm_exit_t exit_code = CRM_EX_OK;
 318     int log_level = LOG_INFO;
 319     const char *exit_type = "gracefully";
 320 
 321     if (action & A_EXIT_1) {
 322         log_level = LOG_ERR;
 323         exit_type = "forcefully";
 324         exit_code = CRM_EX_ERROR;
 325     }
 326 
 327     verify_stopped(cur_state, LOG_ERR);
 328     do_crm_log(log_level, "Performing %s - %s exiting the controller",
 329                fsa_action2string(action), exit_type);
 330 
 331     crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
 332     crmd_exit(exit_code);
 333 }
 334 
 335 static void sigpipe_ignore(int nsig) { return; }
     /* [previous][next][first][last][top][bottom][index][help] */
 336 
 337 /*       A_STARTUP      */
 338 void
 339 do_startup(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 340            enum crmd_fsa_cause cause,
 341            enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 342 {
 343     crm_debug("Registering Signal Handlers");
 344     mainloop_add_signal(SIGTERM, crm_shutdown);
 345     mainloop_add_signal(SIGPIPE, sigpipe_ignore);
 346 
 347     fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
 348     config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);
 349     transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL);
 350 
 351     crm_debug("Creating CIB manager and executor objects");
 352     fsa_cib_conn = cib_new();
 353 
 354     lrm_state_init_local();
 355     if (controld_init_fsa_timers() == FALSE) {
 356         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 357     }
 358 }
 359 
 360 // \return libqb error code (0 on success, -errno on error)
 361 static int32_t
 362 accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
     /* [previous][next][first][last][top][bottom][index][help] */
 363 {
 364     crm_trace("Accepting new IPC client connection");
 365     if (pcmk__new_client(c, uid, gid) == NULL) {
 366         return -EIO;
 367     }
 368     return 0;
 369 }
 370 
 371 // \return libqb error code (0 on success, -errno on error)
 372 static int32_t
 373 dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
     /* [previous][next][first][last][top][bottom][index][help] */
 374 {
 375     uint32_t id = 0;
 376     uint32_t flags = 0;
 377     pcmk__client_t *client = pcmk__find_client(c);
 378 
 379     xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
 380 
 381     if (msg == NULL) {
 382         pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_PROTOCOL);
 383         return 0;
 384     }
 385     pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_INDETERMINATE);
 386 
 387     CRM_ASSERT(client->user != NULL);
 388     pcmk__update_acl_user(msg, F_CRM_USER, client->user);
 389 
 390     crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
 391     if (controld_authorize_ipc_message(msg, client, NULL)) {
 392         crm_trace("Processing IPC message from client %s",
 393                   pcmk__client_name(client));
 394         route_message(C_IPC_MESSAGE, msg);
 395     }
 396 
 397     trigger_fsa();
 398     free_xml(msg);
 399     return 0;
 400 }
 401 
 402 static int32_t
 403 crmd_ipc_closed(qb_ipcs_connection_t * c)
     /* [previous][next][first][last][top][bottom][index][help] */
 404 {
 405     pcmk__client_t *client = pcmk__find_client(c);
 406 
 407     if (client) {
 408         crm_trace("Disconnecting %sregistered client %s (%p/%p)",
 409                   (client->userdata? "" : "un"), pcmk__client_name(client),
 410                   c, client);
 411         free(client->userdata);
 412         pcmk__free_client(client);
 413         trigger_fsa();
 414     }
 415     return 0;
 416 }
 417 
 418 static void
 419 crmd_ipc_destroy(qb_ipcs_connection_t * c)
     /* [previous][next][first][last][top][bottom][index][help] */
 420 {
 421     crm_trace("Connection %p", c);
 422     crmd_ipc_closed(c);
 423 }
 424 
 425 /*       A_STOP */
 426 void
 427 do_stop(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 428         enum crmd_fsa_cause cause,
 429         enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 430 {
 431     crm_trace("Closing IPC server");
 432     mainloop_del_ipc_server(ipcs); ipcs = NULL;
 433     register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
 434 }
 435 
 436 /*       A_STARTED      */
 437 void
 438 do_started(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 439            enum crmd_fsa_cause cause,
 440            enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 441 {
 442     static struct qb_ipcs_service_handlers crmd_callbacks = {
 443         .connection_accept = accept_controller_client,
 444         .connection_created = NULL,
 445         .msg_process = dispatch_controller_ipc,
 446         .connection_closed = crmd_ipc_closed,
 447         .connection_destroyed = crmd_ipc_destroy
 448     };
 449 
 450     if (cur_state != S_STARTING) {
 451         crm_err("Start cancelled... %s", fsa_state2string(cur_state));
 452         return;
 453 
 454     } else if (!pcmk_is_set(fsa_input_register, R_MEMBERSHIP)) {
 455         crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
 456 
 457         crmd_fsa_stall(TRUE);
 458         return;
 459 
 460     } else if (!pcmk_is_set(fsa_input_register, R_LRM_CONNECTED)) {
 461         crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
 462 
 463         crmd_fsa_stall(TRUE);
 464         return;
 465 
 466     } else if (!pcmk_is_set(fsa_input_register, R_CIB_CONNECTED)) {
 467         crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
 468 
 469         crmd_fsa_stall(TRUE);
 470         return;
 471 
 472     } else if (!pcmk_is_set(fsa_input_register, R_READ_CONFIG)) {
 473         crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
 474 
 475         crmd_fsa_stall(TRUE);
 476         return;
 477 
 478     } else if (!pcmk_is_set(fsa_input_register, R_PEER_DATA)) {
 479 
 480         crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
 481         crmd_fsa_stall(TRUE);
 482         return;
 483     }
 484 
 485     crm_debug("Init server comms");
 486     ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
 487     if (ipcs == NULL) {
 488         crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
 489         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 490     } else {
 491         crm_notice("Pacemaker controller successfully started and accepting connections");
 492     }
 493     controld_trigger_fencer_connect();
 494 
 495     controld_clear_fsa_input_flags(R_STARTING);
 496     register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
 497 }
 498 
 499 /*       A_RECOVER      */
 500 void
 501 do_recover(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 502            enum crmd_fsa_cause cause,
 503            enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 504 {
 505     controld_set_fsa_input_flags(R_IN_RECOVERY);
 506     crm_warn("Fast-tracking shutdown in response to errors");
 507 
 508     register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
 509 }
 510 
 511 static pcmk__cluster_option_t crmd_opts[] = {
 512     /* name, old name, type, allowed values,
 513      * default value, validator,
 514      * short description,
 515      * long description
 516      */
 517     {
 518         "dc-version", NULL, "string", NULL, "none", NULL,
 519         "Pacemaker version on cluster node elected Designated Controller (DC)",
 520         "Includes a hash which identifies the exact changeset the code was "
 521             "built from. Used for diagnostic purposes."
 522     },
 523     {
 524         "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
 525         "The messaging stack on which Pacemaker is currently running",
 526         "Used for informational and diagnostic purposes."
 527     },
 528     {
 529         "cluster-name", NULL, "string", NULL, NULL, NULL,
 530         "An arbitrary name for the cluster",
 531         "This optional value is mostly for users' convenience as desired "
 532             "in administration, but may also be used in Pacemaker "
 533             "configuration rules via the #cluster-name node attribute, and "
 534             "by higher-level tools and resource agents."
 535     },
 536     {
 537         XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time",
 538         NULL, "20s", pcmk__valid_interval_spec,
 539         "How long to wait for a response from other nodes during start-up",
 540         "The optimal value will depend on the speed and load of your network "
 541             "and the type of switches used."
 542     },
 543     {
 544         XML_CONFIG_ATTR_RECHECK, NULL, "time",
 545         "Zero disables polling, while positive values are an interval in seconds"
 546             "(unless other units are specified, for example \"5min\")",
 547         "15min", pcmk__valid_interval_spec,
 548         "Polling interval to recheck cluster state and evaluate rules "
 549             "with date specifications",
 550         "Pacemaker is primarily event-driven, and looks ahead to know when to "
 551             "recheck cluster state for failure timeouts and most time-based "
 552             "rules. However, it will also recheck the cluster after this "
 553             "amount of inactivity, to evaluate rules with date specifications "
 554             "and serve as a fail-safe for certain types of scheduler bugs."
 555     },
 556     {
 557         "load-threshold", NULL, "percentage", NULL,
 558         "80%", pcmk__valid_utilization,
 559         "Maximum amount of system load that should be used by cluster nodes",
 560         "The cluster will slow down its recovery process when the amount of "
 561             "system resources used (currently CPU) approaches this limit",
 562     },
 563     {
 564         "node-action-limit", NULL, "integer", NULL,
 565         "0", pcmk__valid_number,
 566         "Maximum number of jobs that can be scheduled per node "
 567             "(defaults to 2x cores)"
 568     },
 569     { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL,
 570         "How a cluster node should react if notified of its own fencing",
 571         "A cluster node may receive notification of its own fencing if fencing "
 572         "is misconfigured, or if fabric fencing is in use that doesn't cut "
 573         "cluster communication. Allowed values are \"stop\" to attempt to "
 574         "immediately stop pacemaker and stay stopped, or \"panic\" to attempt "
 575         "to immediately reboot the local node, falling back to stop on failure."
 576     },
 577     {
 578         XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL,
 579         "2min", pcmk__valid_interval_spec,
 580         "*** Advanced Use Only ***",
 581         "Declare an election failed if it is not decided within this much "
 582             "time. If you need to adjust this value, it probably indicates "
 583             "the presence of a bug."
 584     },
 585     {
 586         XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL,
 587         "20min", pcmk__valid_interval_spec,
 588         "*** Advanced Use Only ***",
 589         "Exit immediately if shutdown does not complete within this much "
 590             "time. If you need to adjust this value, it probably indicates "
 591             "the presence of a bug."
 592     },
 593     {
 594         "join-integration-timeout", "crmd-integration-timeout", "time", NULL,
 595         "3min", pcmk__valid_interval_spec,
 596         "*** Advanced Use Only ***",
 597         "If you need to adjust this value, it probably indicates "
 598             "the presence of a bug."
 599     },
 600     {
 601         "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL,
 602         "30min", pcmk__valid_interval_spec,
 603         "*** Advanced Use Only ***",
 604         "If you need to adjust this value, it probably indicates "
 605             "the presence of a bug."
 606     },
 607     {
 608         "transition-delay", "crmd-transition-delay", "time", NULL,
 609         "0s", pcmk__valid_interval_spec,
 610         "*** Advanced Use Only *** Enabling this option will slow down "
 611             "cluster recovery under all conditions",
 612         "Delay cluster recovery for this much time to allow for additional "
 613             "events to occur. Useful if your configuration is sensitive to "
 614             "the order in which ping updates arrive."
 615     },
 616     {
 617         "stonith-watchdog-timeout", NULL, "time", NULL,
 618         "0", pcmk__valid_sbd_timeout,
 619         "How long to wait before we can assume nodes are safely down "
 620             "when watchdog-based self-fencing via SBD is in use",
 621         "If nonzero, along with `have-watchdog=true` automatically set by the "
 622             "cluster, when fencing is required, watchdog-based self-fencing "
 623             "will be performed via SBD without requiring a fencing resource "
 624             "explicitly configured. "
 625             "If `stonith-watchdog-timeout` is set to a positive value, unseen "
 626             "nodes are assumed to self-fence within this much time. +WARNING:+ "
 627             "It must be ensured that this value is larger than the "
 628             "`SBD_WATCHDOG_TIMEOUT` environment variable on all nodes. "
 629             "Pacemaker verifies the settings individually on all nodes and "
 630             "prevents startup or shuts down if configured wrongly on the fly. "
 631             "It's strongly recommended that `SBD_WATCHDOG_TIMEOUT` is set to "
 632             "the same value on all nodes. "
 633             "If `stonith-watchdog-timeout` is set to a negative value, and "
 634             "`SBD_WATCHDOG_TIMEOUT` is set, twice that value will be used. "
 635             "+WARNING:+ In this case, it's essential (currently not verified by "
 636             "pacemaker) that `SBD_WATCHDOG_TIMEOUT` is set to the same value on "
 637             "all nodes."
 638     },
 639     {
 640         "stonith-max-attempts", NULL, "integer", NULL,
 641         "10", pcmk__valid_positive_number,
 642         "How many times fencing can fail before it will no longer be "
 643             "immediately re-attempted on a target"
 644     },
 645 
 646     // Already documented in libpe_status (other values must be kept identical)
 647     {
 648         "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, demote, suicide",
 649         "stop", pcmk__valid_quorum, NULL, NULL
 650     },
 651     {
 652         XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
 653         "false", pcmk__valid_boolean, NULL, NULL
 654     },
 655 };
 656 
 657 void
 658 crmd_metadata(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 659 {
 660     pcmk__print_option_metadata("pacemaker-controld", "1.0",
 661                                 "Pacemaker controller options",
 662                                 "Cluster options used by Pacemaker's "
 663                                     "controller (formerly called crmd)",
 664                                 crmd_opts, PCMK__NELEM(crmd_opts));
 665 }
 666 
 667 static void
 668 verify_crmd_options(GHashTable * options)
     /* [previous][next][first][last][top][bottom][index][help] */
 669 {
 670     pcmk__validate_cluster_options(options, crmd_opts, PCMK__NELEM(crmd_opts));
 671 }
 672 
 673 static const char *
 674 crmd_pref(GHashTable * options, const char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
 675 {
 676     return pcmk__cluster_option(options, crmd_opts, PCMK__NELEM(crmd_opts),
 677                                 name);
 678 }
 679 
 680 static void
 681 config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 682 {
 683     const char *value = NULL;
 684     GHashTable *config_hash = NULL;
 685     crm_time_t *now = crm_time_new(NULL);
 686     xmlNode *crmconfig = NULL;
 687     xmlNode *alerts = NULL;
 688 
 689     if (rc != pcmk_ok) {
 690         fsa_data_t *msg_data = NULL;
 691 
 692         crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
 693         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 694 
 695         if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
 696             crm_err("The cluster is mis-configured - shutting down and staying down");
 697             controld_set_fsa_input_flags(R_STAYDOWN);
 698         }
 699         goto bail;
 700     }
 701 
 702     crmconfig = output;
 703     if ((crmconfig) &&
 704         (crm_element_name(crmconfig)) &&
 705         (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
 706         crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
 707     }
 708     if (!crmconfig) {
 709         fsa_data_t *msg_data = NULL;
 710 
 711         crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
 712         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 713         goto bail;
 714     }
 715 
 716     crm_debug("Call %d : Parsing CIB options", call_id);
 717     config_hash = pcmk__strkey_table(free, free);
 718     pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL,
 719                       config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
 720 
 721     verify_crmd_options(config_hash);
 722 
 723     value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
 724     election_trigger->period_ms = crm_parse_interval_spec(value);
 725 
 726     value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */
 727     throttle_update_job_max(value);
 728 
 729     value = crmd_pref(config_hash, "load-threshold");
 730     if(value) {
 731         throttle_set_load_target(strtof(value, NULL) / 100.0);
 732     }
 733 
 734     value = crmd_pref(config_hash, "no-quorum-policy");
 735     if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) {
 736         no_quorum_suicide_escalation = TRUE;
 737     }
 738 
 739     set_fence_reaction(crmd_pref(config_hash, XML_CONFIG_ATTR_FENCE_REACTION));
 740 
 741     value = crmd_pref(config_hash,"stonith-max-attempts");
 742     update_stonith_max_attempts(value);
 743 
 744     value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
 745     shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
 746     crm_debug("Shutdown escalation occurs if DC has not responded to request in %ums",
 747               shutdown_escalation_timer->period_ms);
 748 
 749     value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
 750     controld_set_election_period(value);
 751 
 752     value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK);
 753     recheck_interval_ms = crm_parse_interval_spec(value);
 754     crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms);
 755 
 756     value = crmd_pref(config_hash, "transition-delay");
 757     transition_timer->period_ms = crm_parse_interval_spec(value);
 758 
 759     value = crmd_pref(config_hash, "join-integration-timeout");
 760     integration_timer->period_ms = crm_parse_interval_spec(value);
 761 
 762     value = crmd_pref(config_hash, "join-finalization-timeout");
 763     finalization_timer->period_ms = crm_parse_interval_spec(value);
 764 
 765     value = crmd_pref(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK);
 766     controld_shutdown_lock_enabled = crm_is_true(value);
 767 
 768     free(fsa_cluster_name);
 769     fsa_cluster_name = NULL;
 770 
 771     value = g_hash_table_lookup(config_hash, "cluster-name");
 772     if (value) {
 773         fsa_cluster_name = strdup(value);
 774     }
 775 
 776     alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
 777     crmd_unpack_alerts(alerts);
 778 
 779     controld_set_fsa_input_flags(R_READ_CONFIG);
 780     crm_trace("Triggering FSA: %s", __func__);
 781     mainloop_set_trigger(fsa_source);
 782 
 783     g_hash_table_destroy(config_hash);
 784   bail:
 785     crm_time_free(now);
 786 }
 787 
 788 gboolean
 789 crm_read_options(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 790 {
 791     int call_id =
 792         fsa_cib_conn->cmds->query(fsa_cib_conn,
 793             "//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS,
 794             NULL, cib_xpath | cib_scope_local);
 795 
 796     fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback);
 797     crm_trace("Querying the CIB... call %d", call_id);
 798     return TRUE;
 799 }
 800 
 801 /*       A_READCONFIG   */
 802 void
 803 do_read_config(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 804                enum crmd_fsa_cause cause,
 805                enum crmd_fsa_state cur_state,
 806                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 807 {
 808     throttle_init();
 809     mainloop_set_trigger(config_read);
 810 }
 811 
 812 void
 813 crm_shutdown(int nsig)
     /* [previous][next][first][last][top][bottom][index][help] */
 814 {
 815     if ((crmd_mainloop == NULL) || !g_main_loop_is_running(crmd_mainloop)) {
 816         crmd_exit(CRM_EX_OK);
 817         return;
 818     }
 819 
 820     if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
 821         crm_err("Escalating shutdown");
 822         register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
 823         return;
 824     }
 825 
 826     controld_set_fsa_input_flags(R_SHUTDOWN);
 827     register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
 828 
 829     if (shutdown_escalation_timer->period_ms == 0) {
 830         const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
 831 
 832         shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
 833     }
 834 
 835     crm_notice("Initiating controller shutdown sequence " CRM_XS
 836                " limit=%ums", shutdown_escalation_timer->period_ms);
 837     controld_start_timer(shutdown_escalation_timer);
 838 }

/* [previous][next][first][last][top][bottom][index][help] */