root/daemons/controld/controld_control.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. do_ha_control
  2. do_shutdown
  3. do_shutdown_req
  4. crmd_fast_exit
  5. crmd_exit
  6. do_exit
  7. sigpipe_ignore
  8. do_startup
  9. accept_controller_client
  10. dispatch_controller_ipc
  11. ipc_client_disconnected
  12. ipc_connection_destroyed
  13. do_stop
  14. do_started
  15. do_recover
  16. crmd_metadata
  17. config_query_callback
  18. controld_trigger_config_as
  19. crm_read_options
  20. do_read_config
  21. crm_shutdown

   1 /*
   2  * Copyright 2004-2023 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/param.h>
  13 #include <sys/types.h>
  14 #include <sys/stat.h>
  15 
  16 #include <crm/crm.h>
  17 #include <crm/msg_xml.h>
  18 #include <crm/pengine/rules.h>
  19 #include <crm/cluster/internal.h>
  20 #include <crm/cluster/election_internal.h>
  21 #include <crm/common/ipc_internal.h>
  22 
  23 #include <pacemaker-controld.h>
  24 
  25 static qb_ipcs_service_t *ipcs = NULL;
  26 
  27 static crm_trigger_t *config_read_trigger = NULL;
  28 
  29 #if SUPPORT_COROSYNC
  30 extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
  31 #endif
  32 
  33 void crm_shutdown(int nsig);
  34 static gboolean crm_read_options(gpointer user_data);
  35 
  36 /*       A_HA_CONNECT   */
  37 void
  38 do_ha_control(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
  39               enum crmd_fsa_cause cause,
  40               enum crmd_fsa_state cur_state,
  41               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
  42 {
  43     gboolean registered = FALSE;
  44     static crm_cluster_t *cluster = NULL;
  45 
  46     if (cluster == NULL) {
  47         cluster = pcmk_cluster_new();
  48     }
  49 
  50     if (action & A_HA_DISCONNECT) {
  51         crm_cluster_disconnect(cluster);
  52         crm_info("Disconnected from the cluster");
  53 
  54         controld_set_fsa_input_flags(R_HA_DISCONNECTED);
  55     }
  56 
  57     if (action & A_HA_CONNECT) {
  58         crm_set_status_callback(&peer_update_callback);
  59         crm_set_autoreap(FALSE);
  60 
  61 #if SUPPORT_COROSYNC
  62         if (is_corosync_cluster()) {
  63             registered = crm_connect_corosync(cluster);
  64         }
  65 #endif // SUPPORT_COROSYNC
  66 
  67         if (registered) {
  68             controld_election_init(cluster->uname);
  69             controld_globals.our_nodename = cluster->uname;
  70             controld_globals.our_uuid = cluster->uuid;
  71             if(cluster->uuid == NULL) {
  72                 crm_err("Could not obtain local uuid");
  73                 registered = FALSE;
  74             }
  75         }
  76 
  77         if (!registered) {
  78             controld_set_fsa_input_flags(R_HA_DISCONNECTED);
  79             register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
  80             return;
  81         }
  82 
  83         populate_cib_nodes(node_update_none, __func__);
  84         controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
  85         crm_info("Connected to the cluster");
  86     }
  87 
  88     if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
  89         crm_err("Unexpected action %s in %s", fsa_action2string(action),
  90                 __func__);
  91     }
  92 }
  93 
  94 /*       A_SHUTDOWN     */
  95 void
  96 do_shutdown(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
  97             enum crmd_fsa_cause cause,
  98             enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
  99 {
 100     /* just in case */
 101     controld_set_fsa_input_flags(R_SHUTDOWN);
 102     controld_disconnect_fencer(FALSE);
 103 }
 104 
 105 /*       A_SHUTDOWN_REQ */
 106 void
 107 do_shutdown_req(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 108                 enum crmd_fsa_cause cause,
 109                 enum crmd_fsa_state cur_state,
 110                 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 111 {
 112     xmlNode *msg = NULL;
 113 
 114     controld_set_fsa_input_flags(R_SHUTDOWN);
 115     //controld_set_fsa_input_flags(R_STAYDOWN);
 116     crm_info("Sending shutdown request to all peers (DC is %s)",
 117              pcmk__s(controld_globals.dc_name, "not set"));
 118     msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
 119 
 120     if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
 121         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 122     }
 123     free_xml(msg);
 124 }
 125 
 126 void
 127 crmd_fast_exit(crm_exit_t exit_code)
     /* [previous][next][first][last][top][bottom][index][help] */
 128 {
 129     if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
 130         crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
 131                  exit_code, CRM_EX_FATAL);
 132         exit_code = CRM_EX_FATAL;
 133 
 134     } else if ((exit_code == CRM_EX_OK)
 135                && pcmk_is_set(controld_globals.fsa_input_register,
 136                               R_IN_RECOVERY)) {
 137         crm_err("Could not recover from internal error");
 138         exit_code = CRM_EX_ERROR;
 139     }
 140 
 141     if (controld_globals.logger_out != NULL) {
 142         controld_globals.logger_out->finish(controld_globals.logger_out,
 143                                             exit_code, true, NULL);
 144         pcmk__output_free(controld_globals.logger_out);
 145         controld_globals.logger_out = NULL;
 146     }
 147 
 148     crm_exit(exit_code);
 149 }
 150 
 151 crm_exit_t
 152 crmd_exit(crm_exit_t exit_code)
     /* [previous][next][first][last][top][bottom][index][help] */
 153 {
 154     GMainLoop *mloop = controld_globals.mainloop;
 155 
 156     static bool in_progress = FALSE;
 157 
 158     if (in_progress && (exit_code == CRM_EX_OK)) {
 159         crm_debug("Exit is already in progress");
 160         return exit_code;
 161 
 162     } else if(in_progress) {
 163         crm_notice("Error during shutdown process, exiting now with status %d (%s)",
 164                    exit_code, crm_exit_str(exit_code));
 165         crm_write_blackbox(SIGTRAP, NULL);
 166         crmd_fast_exit(exit_code);
 167     }
 168 
 169     in_progress = TRUE;
 170     crm_trace("Preparing to exit with status %d (%s)",
 171               exit_code, crm_exit_str(exit_code));
 172 
 173     /* Suppress secondary errors resulting from us disconnecting everything */
 174     controld_set_fsa_input_flags(R_HA_DISCONNECTED);
 175 
 176 /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
 177 
 178     if(ipcs) {
 179         crm_trace("Closing IPC server");
 180         mainloop_del_ipc_server(ipcs);
 181         ipcs = NULL;
 182     }
 183 
 184     controld_close_attrd_ipc();
 185     controld_shutdown_schedulerd_ipc();
 186     controld_disconnect_fencer(TRUE);
 187 
 188     if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) {
 189         crm_debug("No mainloop detected");
 190         exit_code = CRM_EX_ERROR;
 191     }
 192 
 193     /* On an error, just get out.
 194      *
 195      * Otherwise, make the effort to have mainloop exit gracefully so
 196      * that it (mostly) cleans up after itself and valgrind has less
 197      * to report on - allowing real errors stand out
 198      */
 199     if (exit_code != CRM_EX_OK) {
 200         crm_notice("Forcing immediate exit with status %d (%s)",
 201                    exit_code, crm_exit_str(exit_code));
 202         crm_write_blackbox(SIGTRAP, NULL);
 203         crmd_fast_exit(exit_code);
 204     }
 205 
 206 /* Clean up as much memory as possible for valgrind */
 207 
 208     for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
 209          iter = iter->next) {
 210         fsa_data_t *fsa_data = (fsa_data_t *) iter->data;
 211 
 212         crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
 213                  fsa_input2string(fsa_data->fsa_input),
 214                  fsa_state2string(controld_globals.fsa_state),
 215                  fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
 216         delete_fsa_input(fsa_data);
 217     }
 218 
 219     controld_clear_fsa_input_flags(R_MEMBERSHIP);
 220 
 221     g_list_free(controld_globals.fsa_message_queue);
 222     controld_globals.fsa_message_queue = NULL;
 223 
 224     controld_free_node_pending_timers();
 225     controld_election_fini();
 226 
 227     /* Tear down the CIB manager connection, but don't free it yet -- it could
 228      * be used when we drain the mainloop later.
 229      */
 230 
 231     controld_disconnect_cib_manager();
 232 
 233     verify_stopped(controld_globals.fsa_state, LOG_WARNING);
 234     controld_clear_fsa_input_flags(R_LRM_CONNECTED);
 235     lrm_state_destroy_all();
 236 
 237     mainloop_destroy_trigger(config_read_trigger);
 238     config_read_trigger = NULL;
 239 
 240     controld_destroy_fsa_trigger();
 241     controld_destroy_transition_trigger();
 242 
 243     pcmk__client_cleanup();
 244     crm_peer_destroy();
 245 
 246     controld_free_fsa_timers();
 247     te_cleanup_stonith_history_sync(NULL, TRUE);
 248     controld_free_sched_timer();
 249 
 250     free(controld_globals.our_nodename);
 251     controld_globals.our_nodename = NULL;
 252 
 253     free(controld_globals.our_uuid);
 254     controld_globals.our_uuid = NULL;
 255 
 256     free(controld_globals.dc_name);
 257     controld_globals.dc_name = NULL;
 258 
 259     free(controld_globals.dc_version);
 260     controld_globals.dc_version = NULL;
 261 
 262     free(controld_globals.cluster_name);
 263     controld_globals.cluster_name = NULL;
 264 
 265     free(controld_globals.te_uuid);
 266     controld_globals.te_uuid = NULL;
 267 
 268     free_max_generation();
 269     controld_destroy_failed_sync_table();
 270     controld_destroy_outside_events_table();
 271 
 272     mainloop_destroy_signal(SIGPIPE);
 273     mainloop_destroy_signal(SIGUSR1);
 274     mainloop_destroy_signal(SIGTERM);
 275     mainloop_destroy_signal(SIGTRAP);
 276     /* leave SIGCHLD engaged as we might still want to drain some service-actions */
 277 
 278     if (mloop) {
 279         GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop);
 280 
 281         /* Don't re-enter this block */
 282         controld_globals.mainloop = NULL;
 283 
 284         /* no signals on final draining anymore */
 285         mainloop_destroy_signal(SIGCHLD);
 286 
 287         crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
 288 
 289         {
 290             int lpc = 0;
 291 
 292             while((g_main_context_pending(ctx) && lpc < 10)) {
 293                 lpc++;
 294                 crm_trace("Iteration %d", lpc);
 295                 g_main_context_dispatch(ctx);
 296             }
 297         }
 298 
 299         crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
 300         g_main_loop_quit(mloop);
 301 
 302         /* Won't do anything yet, since we're inside it now */
 303         g_main_loop_unref(mloop);
 304     } else {
 305         mainloop_destroy_signal(SIGCHLD);
 306     }
 307 
 308     cib_delete(controld_globals.cib_conn);
 309     controld_globals.cib_conn = NULL;
 310 
 311     throttle_fini();
 312 
 313     /* Graceful */
 314     crm_trace("Done preparing for exit with status %d (%s)",
 315               exit_code, crm_exit_str(exit_code));
 316     return exit_code;
 317 }
 318 
 319 /*       A_EXIT_0, A_EXIT_1     */
 320 void
 321 do_exit(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 322         enum crmd_fsa_cause cause,
 323         enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 324 {
 325     crm_exit_t exit_code = CRM_EX_OK;
 326 
 327     if (pcmk_is_set(action, A_EXIT_1)) {
 328         exit_code = CRM_EX_ERROR;
 329         crm_err("Exiting now due to errors");
 330     }
 331     verify_stopped(cur_state, LOG_ERR);
 332     crmd_exit(exit_code);
 333 }
 334 
 335 static void sigpipe_ignore(int nsig) { return; }
     /* [previous][next][first][last][top][bottom][index][help] */
 336 
 337 /*       A_STARTUP      */
 338 void
 339 do_startup(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 340            enum crmd_fsa_cause cause,
 341            enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 342 {
 343     crm_debug("Registering Signal Handlers");
 344     mainloop_add_signal(SIGTERM, crm_shutdown);
 345     mainloop_add_signal(SIGPIPE, sigpipe_ignore);
 346 
 347     config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH,
 348                                                crm_read_options, NULL);
 349 
 350     controld_init_fsa_trigger();
 351     controld_init_transition_trigger();
 352 
 353     crm_debug("Creating CIB manager and executor objects");
 354     controld_globals.cib_conn = cib_new();
 355 
 356     lrm_state_init_local();
 357     if (controld_init_fsa_timers() == FALSE) {
 358         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 359     }
 360 }
 361 
 362 // \return libqb error code (0 on success, -errno on error)
 363 static int32_t
 364 accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
     /* [previous][next][first][last][top][bottom][index][help] */
 365 {
 366     crm_trace("Accepting new IPC client connection");
 367     if (pcmk__new_client(c, uid, gid) == NULL) {
 368         return -EIO;
 369     }
 370     return 0;
 371 }
 372 
 373 // \return libqb error code (0 on success, -errno on error)
 374 static int32_t
 375 dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
     /* [previous][next][first][last][top][bottom][index][help] */
 376 {
 377     uint32_t id = 0;
 378     uint32_t flags = 0;
 379     pcmk__client_t *client = pcmk__find_client(c);
 380 
 381     xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
 382 
 383     if (msg == NULL) {
 384         pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
 385         return 0;
 386     }
 387     pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
 388 
 389     CRM_ASSERT(client->user != NULL);
 390     pcmk__update_acl_user(msg, F_CRM_USER, client->user);
 391 
 392     crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
 393     if (controld_authorize_ipc_message(msg, client, NULL)) {
 394         crm_trace("Processing IPC message from client %s",
 395                   pcmk__client_name(client));
 396         route_message(C_IPC_MESSAGE, msg);
 397     }
 398 
 399     controld_trigger_fsa();
 400     free_xml(msg);
 401     return 0;
 402 }
 403 
 404 static int32_t
 405 ipc_client_disconnected(qb_ipcs_connection_t *c)
     /* [previous][next][first][last][top][bottom][index][help] */
 406 {
 407     pcmk__client_t *client = pcmk__find_client(c);
 408 
 409     if (client) {
 410         crm_trace("Disconnecting %sregistered client %s (%p/%p)",
 411                   (client->userdata? "" : "un"), pcmk__client_name(client),
 412                   c, client);
 413         free(client->userdata);
 414         pcmk__free_client(client);
 415         controld_trigger_fsa();
 416     }
 417     return 0;
 418 }
 419 
 420 static void
 421 ipc_connection_destroyed(qb_ipcs_connection_t *c)
     /* [previous][next][first][last][top][bottom][index][help] */
 422 {
 423     crm_trace("Connection %p", c);
 424     ipc_client_disconnected(c);
 425 }
 426 
 427 /*       A_STOP */
 428 void
 429 do_stop(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 430         enum crmd_fsa_cause cause,
 431         enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 432 {
 433     crm_trace("Closing IPC server");
 434     mainloop_del_ipc_server(ipcs); ipcs = NULL;
 435     register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
 436 }
 437 
 438 /*       A_STARTED      */
 439 void
 440 do_started(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 441            enum crmd_fsa_cause cause,
 442            enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 443 {
 444     static struct qb_ipcs_service_handlers crmd_callbacks = {
 445         .connection_accept = accept_controller_client,
 446         .connection_created = NULL,
 447         .msg_process = dispatch_controller_ipc,
 448         .connection_closed = ipc_client_disconnected,
 449         .connection_destroyed = ipc_connection_destroyed
 450     };
 451 
 452     if (cur_state != S_STARTING) {
 453         crm_err("Start cancelled... %s", fsa_state2string(cur_state));
 454         return;
 455 
 456     } else if (!pcmk_is_set(controld_globals.fsa_input_register,
 457                             R_MEMBERSHIP)) {
 458         crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
 459 
 460         crmd_fsa_stall(TRUE);
 461         return;
 462 
 463     } else if (!pcmk_is_set(controld_globals.fsa_input_register,
 464                             R_LRM_CONNECTED)) {
 465         crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
 466 
 467         crmd_fsa_stall(TRUE);
 468         return;
 469 
 470     } else if (!pcmk_is_set(controld_globals.fsa_input_register,
 471                             R_CIB_CONNECTED)) {
 472         crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
 473 
 474         crmd_fsa_stall(TRUE);
 475         return;
 476 
 477     } else if (!pcmk_is_set(controld_globals.fsa_input_register,
 478                             R_READ_CONFIG)) {
 479         crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
 480 
 481         crmd_fsa_stall(TRUE);
 482         return;
 483 
 484     } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) {
 485 
 486         crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
 487         crmd_fsa_stall(TRUE);
 488         return;
 489     }
 490 
 491     crm_debug("Init server comms");
 492     ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
 493     if (ipcs == NULL) {
 494         crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
 495         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 496     } else {
 497         crm_notice("Pacemaker controller successfully started and accepting connections");
 498     }
 499     controld_set_fsa_input_flags(R_ST_REQUIRED);
 500     controld_timer_fencer_connect(GINT_TO_POINTER(TRUE));
 501 
 502     controld_clear_fsa_input_flags(R_STARTING);
 503     register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
 504 }
 505 
 506 /*       A_RECOVER      */
 507 void
 508 do_recover(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 509            enum crmd_fsa_cause cause,
 510            enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 511 {
 512     controld_set_fsa_input_flags(R_IN_RECOVERY);
 513     crm_warn("Fast-tracking shutdown in response to errors");
 514 
 515     register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
 516 }
 517 
 518 static pcmk__cluster_option_t controller_options[] = {
 519     /* name, old name, type, allowed values,
 520      * default value, validator,
 521      * short description,
 522      * long description
 523      */
 524     {
 525         "dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL,
 526         N_("Pacemaker version on cluster node elected Designated Controller (DC)"),
 527         N_("Includes a hash which identifies the exact changeset the code was "
 528             "built from. Used for diagnostic purposes.")
 529     },
 530     {
 531         "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
 532         N_("The messaging stack on which Pacemaker is currently running"),
 533         N_("Used for informational and diagnostic purposes.")
 534     },
 535     {
 536         "cluster-name", NULL, "string", NULL, NULL, NULL,
 537         N_("An arbitrary name for the cluster"),
 538         N_("This optional value is mostly for users' convenience as desired "
 539             "in administration, but may also be used in Pacemaker "
 540             "configuration rules via the #cluster-name node attribute, and "
 541             "by higher-level tools and resource agents.")
 542     },
 543     {
 544         XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time",
 545         NULL, "20s", pcmk__valid_interval_spec,
 546         N_("How long to wait for a response from other nodes during start-up"),
 547         N_("The optimal value will depend on the speed and load of your network "
 548             "and the type of switches used.")
 549     },
 550     {
 551         XML_CONFIG_ATTR_RECHECK, NULL, "time",
 552         N_("Zero disables polling, while positive values are an interval in seconds"
 553             "(unless other units are specified, for example \"5min\")"),
 554         "15min", pcmk__valid_interval_spec,
 555         N_("Polling interval to recheck cluster state and evaluate rules "
 556             "with date specifications"),
 557         N_("Pacemaker is primarily event-driven, and looks ahead to know when to "
 558             "recheck cluster state for failure timeouts and most time-based "
 559             "rules. However, it will also recheck the cluster after this "
 560             "amount of inactivity, to evaluate rules with date specifications "
 561             "and serve as a fail-safe for certain types of scheduler bugs.")
 562     },
 563     {
 564         "load-threshold", NULL, "percentage", NULL,
 565         "80%", pcmk__valid_percentage,
 566         N_("Maximum amount of system load that should be used by cluster nodes"),
 567         N_("The cluster will slow down its recovery process when the amount of "
 568             "system resources used (currently CPU) approaches this limit"),
 569     },
 570     {
 571         "node-action-limit", NULL, "integer", NULL,
 572         "0", pcmk__valid_number,
 573         N_("Maximum number of jobs that can be scheduled per node "
 574             "(defaults to 2x cores)")
 575     },
 576     { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL,
 577         N_("How a cluster node should react if notified of its own fencing"),
 578         N_("A cluster node may receive notification of its own fencing if fencing "
 579         "is misconfigured, or if fabric fencing is in use that doesn't cut "
 580         "cluster communication. Allowed values are \"stop\" to attempt to "
 581         "immediately stop Pacemaker and stay stopped, or \"panic\" to attempt "
 582         "to immediately reboot the local node, falling back to stop on failure.")
 583     },
 584     {
 585         XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL,
 586         "2min", pcmk__valid_interval_spec,
 587         "*** Advanced Use Only ***",
 588         N_("Declare an election failed if it is not decided within this much "
 589             "time. If you need to adjust this value, it probably indicates "
 590             "the presence of a bug.")
 591     },
 592     {
 593         XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL,
 594         "20min", pcmk__valid_interval_spec,
 595         "*** Advanced Use Only ***",
 596         N_("Exit immediately if shutdown does not complete within this much "
 597             "time. If you need to adjust this value, it probably indicates "
 598             "the presence of a bug.")
 599     },
 600     {
 601         "join-integration-timeout", "crmd-integration-timeout", "time", NULL,
 602         "3min", pcmk__valid_interval_spec,
 603         "*** Advanced Use Only ***",
 604         N_("If you need to adjust this value, it probably indicates "
 605             "the presence of a bug.")
 606     },
 607     {
 608         "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL,
 609         "30min", pcmk__valid_interval_spec,
 610         "*** Advanced Use Only ***",
 611         N_("If you need to adjust this value, it probably indicates "
 612             "the presence of a bug.")
 613     },
 614     {
 615         "transition-delay", "crmd-transition-delay", "time", NULL,
 616         "0s", pcmk__valid_interval_spec,
 617         N_("*** Advanced Use Only *** Enabling this option will slow down "
 618             "cluster recovery under all conditions"),
 619         N_("Delay cluster recovery for this much time to allow for additional "
 620             "events to occur. Useful if your configuration is sensitive to "
 621             "the order in which ping updates arrive.")
 622     },
 623     {
 624         "stonith-watchdog-timeout", NULL, "time", NULL,
 625         "0", controld_verify_stonith_watchdog_timeout,
 626         N_("How long before nodes can be assumed to be safely down when "
 627            "watchdog-based self-fencing via SBD is in use"),
 628         N_("If this is set to a positive value, lost nodes are assumed to "
 629            "self-fence using watchdog-based SBD within this much time. This "
 630            "does not require a fencing resource to be explicitly configured, "
 631            "though a fence_watchdog resource can be configured, to limit use "
 632            "to specific nodes. If this is set to 0 (the default), the cluster "
 633            "will never assume watchdog-based self-fencing. If this is set to a "
 634            "negative value, the cluster will use twice the local value of the "
 635            "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, "
 636            "or otherwise treat this as 0. WARNING: When used, this timeout "
 637            "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use "
 638            "watchdog-based SBD, and Pacemaker will refuse to start on any of "
 639            "those nodes where this is not true for the local value or SBD is "
 640            "not active. When this is set to a negative value, "
 641            "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes "
 642            "that use SBD, otherwise data corruption or loss could occur.")
 643     },
 644     {
 645         "stonith-max-attempts", NULL, "integer", NULL,
 646         "10", pcmk__valid_positive_number,
 647         N_("How many times fencing can fail before it will no longer be "
 648             "immediately re-attempted on a target")
 649     },
 650 
 651     // Already documented in libpe_status (other values must be kept identical)
 652     {
 653         "no-quorum-policy", NULL, "select",
 654         "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum,
 655         N_("What to do when the cluster does not have quorum"), NULL
 656     },
 657     {
 658         XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
 659         "false", pcmk__valid_boolean,
 660         N_("Whether to lock resources to a cleanly shut down node"),
 661         N_("When true, resources active on a node when it is cleanly shut down "
 662             "are kept \"locked\" to that node (not allowed to run elsewhere) "
 663             "until they start again on that node after it rejoins (or for at "
 664             "most shutdown-lock-limit, if set). Stonith resources and "
 665             "Pacemaker Remote connections are never locked. Clone and bundle "
 666             "instances and the promoted role of promotable clones are "
 667             "currently never locked, though support could be added in a future "
 668             "release.")
 669     },
 670     {
 671         XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
 672         "0", pcmk__valid_interval_spec,
 673         N_("Do not lock resources to a cleanly shut down node longer than "
 674            "this"),
 675         N_("If shutdown-lock is true and this is set to a nonzero time "
 676             "duration, shutdown locks will expire after this much time has "
 677             "passed since the shutdown was initiated, even if the node has not "
 678             "rejoined.")
 679     },
 680     {
 681         XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL,
 682         "0", pcmk__valid_interval_spec,
 683         N_("How long to wait for a node that has joined the cluster to join "
 684            "the controller process group"),
 685         N_("Fence nodes that do not join the controller process group within "
 686            "this much time after joining the cluster, to allow the cluster "
 687            "to continue managing resources. A value of 0 means never fence " 
 688            "pending nodes. Setting the value to 2h means fence nodes after "
 689            "2 hours.")
 690     },
 691 };
 692 
 693 void
 694 crmd_metadata(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 695 {
 696     const char *desc_short = "Pacemaker controller options";
 697     const char *desc_long = "Cluster options used by Pacemaker's controller";
 698 
 699     gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short,
 700                                             desc_long, controller_options,
 701                                             PCMK__NELEM(controller_options));
 702     printf("%s", s);
 703     g_free(s);
 704 }
 705 
 706 static void
 707 config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 708 {
 709     const char *value = NULL;
 710     GHashTable *config_hash = NULL;
 711     crm_time_t *now = crm_time_new(NULL);
 712     xmlNode *crmconfig = NULL;
 713     xmlNode *alerts = NULL;
 714 
 715     if (rc != pcmk_ok) {
 716         fsa_data_t *msg_data = NULL;
 717 
 718         crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
 719         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 720 
 721         if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
 722             crm_err("The cluster is mis-configured - shutting down and staying down");
 723             controld_set_fsa_input_flags(R_STAYDOWN);
 724         }
 725         goto bail;
 726     }
 727 
 728     crmconfig = output;
 729     if ((crmconfig != NULL)
 730         && !pcmk__xe_is(crmconfig, XML_CIB_TAG_CRMCONFIG)) {
 731         crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
 732     }
 733     if (!crmconfig) {
 734         fsa_data_t *msg_data = NULL;
 735 
 736         crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
 737         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 738         goto bail;
 739     }
 740 
 741     crm_debug("Call %d : Parsing CIB options", call_id);
 742     config_hash = pcmk__strkey_table(free, free);
 743     pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL,
 744                       config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
 745 
 746     // Validate all options, and use defaults if not already present in hash
 747     pcmk__validate_cluster_options(config_hash, controller_options,
 748                                    PCMK__NELEM(controller_options));
 749 
 750     value = g_hash_table_lookup(config_hash, "no-quorum-policy");
 751     if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) {
 752         controld_set_global_flags(controld_no_quorum_suicide);
 753     }
 754 
 755     value = g_hash_table_lookup(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK);
 756     if (crm_is_true(value)) {
 757         controld_set_global_flags(controld_shutdown_lock_enabled);
 758     } else {
 759         controld_clear_global_flags(controld_shutdown_lock_enabled);
 760     }
 761 
 762     value = g_hash_table_lookup(config_hash,
 763                                 XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT);
 764     controld_globals.shutdown_lock_limit = crm_parse_interval_spec(value)
 765                                            / 1000;
 766 
 767     value = g_hash_table_lookup(config_hash,
 768                                 XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT);
 769     controld_globals.node_pending_timeout = crm_parse_interval_spec(value) / 1000;
 770 
 771     value = g_hash_table_lookup(config_hash, "cluster-name");
 772     pcmk__str_update(&(controld_globals.cluster_name), value);
 773 
 774     // Let subcomponents initialize their own static variables
 775     controld_configure_election(config_hash);
 776     controld_configure_fencing(config_hash);
 777     controld_configure_fsa_timers(config_hash);
 778     controld_configure_throttle(config_hash);
 779 
 780     alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
 781     crmd_unpack_alerts(alerts);
 782 
 783     controld_set_fsa_input_flags(R_READ_CONFIG);
 784     controld_trigger_fsa();
 785 
 786     g_hash_table_destroy(config_hash);
 787   bail:
 788     crm_time_free(now);
 789 }
 790 
 791 /*!
 792  * \internal
 793  * \brief Trigger read and processing of the configuration
 794  *
 795  * \param[in] fn    Calling function name
 796  * \param[in] line  Line number where call occurred
 797  */
 798 void
 799 controld_trigger_config_as(const char *fn, int line)
     /* [previous][next][first][last][top][bottom][index][help] */
 800 {
 801     if (config_read_trigger != NULL) {
 802         crm_trace("%s:%d - Triggered config processing", fn, line);
 803         mainloop_set_trigger(config_read_trigger);
 804     }
 805 }
 806 
 807 gboolean
 808 crm_read_options(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 809 {
 810     cib_t *cib_conn = controld_globals.cib_conn;
 811     int call_id = cib_conn->cmds->query(cib_conn,
 812                                         "//" XML_CIB_TAG_CRMCONFIG
 813                                         " | //" XML_CIB_TAG_ALERTS,
 814                                         NULL, cib_xpath|cib_scope_local);
 815 
 816     fsa_register_cib_callback(call_id, NULL, config_query_callback);
 817     crm_trace("Querying the CIB... call %d", call_id);
 818     return TRUE;
 819 }
 820 
 821 /*       A_READCONFIG   */
 822 void
 823 do_read_config(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 824                enum crmd_fsa_cause cause,
 825                enum crmd_fsa_state cur_state,
 826                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 827 {
 828     throttle_init();
 829     controld_trigger_config();
 830 }
 831 
 832 void
 833 crm_shutdown(int nsig)
     /* [previous][next][first][last][top][bottom][index][help] */
 834 {
 835     const char *value = NULL;
 836     guint default_period_ms = 0;
 837 
 838     if ((controld_globals.mainloop == NULL)
 839         || !g_main_loop_is_running(controld_globals.mainloop)) {
 840         crmd_exit(CRM_EX_OK);
 841         return;
 842     }
 843 
 844     if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
 845         crm_err("Escalating shutdown");
 846         register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
 847         return;
 848     }
 849 
 850     controld_set_fsa_input_flags(R_SHUTDOWN);
 851     register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
 852 
 853     /* If shutdown timer doesn't have a period set, use the default
 854      *
 855      * @TODO: Evaluate whether this is still necessary. As long as
 856      * config_query_callback() has been run at least once, it doesn't look like
 857      * anything could have changed the timer period since then.
 858      */
 859     value = pcmk__cluster_option(NULL, controller_options,
 860                                  PCMK__NELEM(controller_options),
 861                                  XML_CONFIG_ATTR_FORCE_QUIT);
 862     default_period_ms = crm_parse_interval_spec(value);
 863     controld_shutdown_start_countdown(default_period_ms);
 864 }

/* [previous][next][first][last][top][bottom][index][help] */