This source file includes following definitions.
- do_ha_control
- do_shutdown
- do_shutdown_req
- crmd_fast_exit
- crmd_exit
- do_exit
- sigpipe_ignore
- do_startup
- accept_controller_client
- dispatch_controller_ipc
- ipc_client_disconnected
- ipc_connection_destroyed
- do_stop
- do_started
- do_recover
- crmd_metadata
- controller_option
- config_query_callback
- crm_read_options
- do_read_config
- crm_shutdown
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <sys/types.h>
14 #include <sys/stat.h>
15
16 #include <crm/crm.h>
17 #include <crm/msg_xml.h>
18 #include <crm/pengine/rules.h>
19 #include <crm/cluster/internal.h>
20 #include <crm/cluster/election_internal.h>
21 #include <crm/common/ipc_internal.h>
22
23 #include <pacemaker-controld.h>
24
25 qb_ipcs_service_t *ipcs = NULL;
26
27 #if SUPPORT_COROSYNC
28 extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
29 #endif
30
31 void crm_shutdown(int nsig);
32 gboolean crm_read_options(gpointer user_data);
33
34 gboolean fsa_has_quorum = FALSE;
35 crm_trigger_t *fsa_source = NULL;
36 crm_trigger_t *config_read = NULL;
37 bool no_quorum_suicide_escalation = FALSE;
38 bool controld_shutdown_lock_enabled = false;
39
40
41 void
42 do_ha_control(long long action,
43 enum crmd_fsa_cause cause,
44 enum crmd_fsa_state cur_state,
45 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
46 {
47 gboolean registered = FALSE;
48 static crm_cluster_t *cluster = NULL;
49
50 if (cluster == NULL) {
51 cluster = calloc(1, sizeof(crm_cluster_t));
52 }
53
54 if (action & A_HA_DISCONNECT) {
55 crm_cluster_disconnect(cluster);
56 crm_info("Disconnected from the cluster");
57
58 controld_set_fsa_input_flags(R_HA_DISCONNECTED);
59 }
60
61 if (action & A_HA_CONNECT) {
62 crm_set_status_callback(&peer_update_callback);
63 crm_set_autoreap(FALSE);
64
65 if (is_corosync_cluster()) {
66 #if SUPPORT_COROSYNC
67 registered = crm_connect_corosync(cluster);
68 #endif
69 }
70
71 if (registered == TRUE) {
72 controld_election_init(cluster->uname);
73 fsa_our_uname = cluster->uname;
74 fsa_our_uuid = cluster->uuid;
75 if(cluster->uuid == NULL) {
76 crm_err("Could not obtain local uuid");
77 registered = FALSE;
78 }
79 }
80
81 if (registered == FALSE) {
82 controld_set_fsa_input_flags(R_HA_DISCONNECTED);
83 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
84 return;
85 }
86
87 populate_cib_nodes(node_update_none, __func__);
88 controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
89 crm_info("Connected to the cluster");
90 }
91
92 if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
93 crm_err("Unexpected action %s in %s", fsa_action2string(action),
94 __func__);
95 }
96 }
97
98
99 void
100 do_shutdown(long long action,
101 enum crmd_fsa_cause cause,
102 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
103 {
104
105 controld_set_fsa_input_flags(R_SHUTDOWN);
106 controld_disconnect_fencer(FALSE);
107 }
108
109
110 void
111 do_shutdown_req(long long action,
112 enum crmd_fsa_cause cause,
113 enum crmd_fsa_state cur_state,
114 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
115 {
116 xmlNode *msg = NULL;
117
118 controld_set_fsa_input_flags(R_SHUTDOWN);
119
120 crm_info("Sending shutdown request to all peers (DC is %s)",
121 (fsa_our_dc? fsa_our_dc : "not set"));
122 msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
123
124 if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
125 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
126 }
127 free_xml(msg);
128 }
129
130 extern char *max_generation_from;
131 extern xmlNode *max_generation_xml;
132 extern GHashTable *resource_history;
133 extern GHashTable *voted;
134 extern pcmk__output_t *logger_out;
135
136 void
137 crmd_fast_exit(crm_exit_t exit_code)
138 {
139 if (pcmk_is_set(fsa_input_register, R_STAYDOWN)) {
140 crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
141 exit_code, CRM_EX_FATAL);
142 exit_code = CRM_EX_FATAL;
143
144 } else if ((exit_code == CRM_EX_OK)
145 && pcmk_is_set(fsa_input_register, R_IN_RECOVERY)) {
146 crm_err("Could not recover from internal error");
147 exit_code = CRM_EX_ERROR;
148 }
149
150 if (logger_out != NULL) {
151 logger_out->finish(logger_out, exit_code, true, NULL);
152 pcmk__output_free(logger_out);
153 logger_out = NULL;
154 }
155
156 crm_exit(exit_code);
157 }
158
159 crm_exit_t
160 crmd_exit(crm_exit_t exit_code)
161 {
162 GList *gIter = NULL;
163 GMainLoop *mloop = crmd_mainloop;
164
165 static bool in_progress = FALSE;
166
167 if (in_progress && (exit_code == CRM_EX_OK)) {
168 crm_debug("Exit is already in progress");
169 return exit_code;
170
171 } else if(in_progress) {
172 crm_notice("Error during shutdown process, exiting now with status %d (%s)",
173 exit_code, crm_exit_str(exit_code));
174 crm_write_blackbox(SIGTRAP, NULL);
175 crmd_fast_exit(exit_code);
176 }
177
178 in_progress = TRUE;
179 crm_trace("Preparing to exit with status %d (%s)",
180 exit_code, crm_exit_str(exit_code));
181
182
183 controld_set_fsa_input_flags(R_HA_DISCONNECTED);
184
185
186
187 if(ipcs) {
188 crm_trace("Closing IPC server");
189 mainloop_del_ipc_server(ipcs);
190 ipcs = NULL;
191 }
192
193 controld_close_attrd_ipc();
194 controld_shutdown_schedulerd_ipc();
195 controld_disconnect_fencer(TRUE);
196
197 if ((exit_code == CRM_EX_OK) && (crmd_mainloop == NULL)) {
198 crm_debug("No mainloop detected");
199 exit_code = CRM_EX_ERROR;
200 }
201
202
203
204
205
206
207
208 if (exit_code != CRM_EX_OK) {
209 crm_notice("Forcing immediate exit with status %d (%s)",
210 exit_code, crm_exit_str(exit_code));
211 crm_write_blackbox(SIGTRAP, NULL);
212 crmd_fast_exit(exit_code);
213 }
214
215
216
217 for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) {
218 fsa_data_t *fsa_data = gIter->data;
219
220 crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
221 fsa_input2string(fsa_data->fsa_input),
222 fsa_state2string(fsa_state),
223 fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
224 delete_fsa_input(fsa_data);
225 }
226
227 controld_clear_fsa_input_flags(R_MEMBERSHIP);
228 g_list_free(fsa_message_queue); fsa_message_queue = NULL;
229
230 controld_election_fini();
231
232
233
234
235
236 controld_disconnect_cib_manager();
237
238 verify_stopped(fsa_state, LOG_WARNING);
239 controld_clear_fsa_input_flags(R_LRM_CONNECTED);
240 lrm_state_destroy_all();
241
242
243 mainloop_destroy_trigger(fsa_source); fsa_source = NULL;
244
245 mainloop_destroy_trigger(config_read); config_read = NULL;
246 mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL;
247
248 pcmk__client_cleanup();
249 crm_peer_destroy();
250
251 controld_free_fsa_timers();
252 te_cleanup_stonith_history_sync(NULL, TRUE);
253 controld_free_sched_timer();
254
255 free(fsa_our_dc_version); fsa_our_dc_version = NULL;
256 free(fsa_our_uname); fsa_our_uname = NULL;
257 free(fsa_our_uuid); fsa_our_uuid = NULL;
258 free(fsa_our_dc); fsa_our_dc = NULL;
259
260 free(fsa_cluster_name); fsa_cluster_name = NULL;
261
262 free(te_uuid); te_uuid = NULL;
263 free(failed_stop_offset); failed_stop_offset = NULL;
264 free(failed_start_offset); failed_start_offset = NULL;
265
266 free(max_generation_from); max_generation_from = NULL;
267 free_xml(max_generation_xml); max_generation_xml = NULL;
268
269 mainloop_destroy_signal(SIGPIPE);
270 mainloop_destroy_signal(SIGUSR1);
271 mainloop_destroy_signal(SIGTERM);
272 mainloop_destroy_signal(SIGTRAP);
273
274
275 if (mloop) {
276 GMainContext *ctx = g_main_loop_get_context(crmd_mainloop);
277
278
279 crmd_mainloop = NULL;
280
281
282 mainloop_destroy_signal(SIGCHLD);
283
284 crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
285
286 {
287 int lpc = 0;
288
289 while((g_main_context_pending(ctx) && lpc < 10)) {
290 lpc++;
291 crm_trace("Iteration %d", lpc);
292 g_main_context_dispatch(ctx);
293 }
294 }
295
296 crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
297 g_main_loop_quit(mloop);
298
299
300 g_main_loop_unref(mloop);
301 } else {
302 mainloop_destroy_signal(SIGCHLD);
303 }
304
305 cib_delete(fsa_cib_conn);
306 fsa_cib_conn = NULL;
307
308 throttle_fini();
309
310
311 crm_trace("Done preparing for exit with status %d (%s)",
312 exit_code, crm_exit_str(exit_code));
313 return exit_code;
314 }
315
316
317 void
318 do_exit(long long action,
319 enum crmd_fsa_cause cause,
320 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
321 {
322 crm_exit_t exit_code = CRM_EX_OK;
323 int log_level = LOG_INFO;
324 const char *exit_type = "gracefully";
325
326 if (action & A_EXIT_1) {
327 log_level = LOG_ERR;
328 exit_type = "forcefully";
329 exit_code = CRM_EX_ERROR;
330 }
331
332 verify_stopped(cur_state, LOG_ERR);
333 do_crm_log(log_level, "Performing %s - %s exiting the controller",
334 fsa_action2string(action), exit_type);
335
336 crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
337 crmd_exit(exit_code);
338 }
339
340 static void sigpipe_ignore(int nsig) { return; }
341
342
343 void
344 do_startup(long long action,
345 enum crmd_fsa_cause cause,
346 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
347 {
348 crm_debug("Registering Signal Handlers");
349 mainloop_add_signal(SIGTERM, crm_shutdown);
350 mainloop_add_signal(SIGPIPE, sigpipe_ignore);
351
352 fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
353 config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);
354 transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL);
355
356 crm_debug("Creating CIB manager and executor objects");
357 fsa_cib_conn = cib_new();
358
359 lrm_state_init_local();
360 if (controld_init_fsa_timers() == FALSE) {
361 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
362 }
363 }
364
365
366 static int32_t
367 accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
368 {
369 crm_trace("Accepting new IPC client connection");
370 if (pcmk__new_client(c, uid, gid) == NULL) {
371 return -EIO;
372 }
373 return 0;
374 }
375
376
377 static int32_t
378 dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
379 {
380 uint32_t id = 0;
381 uint32_t flags = 0;
382 pcmk__client_t *client = pcmk__find_client(c);
383
384 xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
385
386 if (msg == NULL) {
387 pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
388 return 0;
389 }
390 pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
391
392 CRM_ASSERT(client->user != NULL);
393 pcmk__update_acl_user(msg, F_CRM_USER, client->user);
394
395 crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
396 if (controld_authorize_ipc_message(msg, client, NULL)) {
397 crm_trace("Processing IPC message from client %s",
398 pcmk__client_name(client));
399 route_message(C_IPC_MESSAGE, msg);
400 }
401
402 trigger_fsa();
403 free_xml(msg);
404 return 0;
405 }
406
407 static int32_t
408 ipc_client_disconnected(qb_ipcs_connection_t *c)
409 {
410 pcmk__client_t *client = pcmk__find_client(c);
411
412 if (client) {
413 crm_trace("Disconnecting %sregistered client %s (%p/%p)",
414 (client->userdata? "" : "un"), pcmk__client_name(client),
415 c, client);
416 free(client->userdata);
417 pcmk__free_client(client);
418 trigger_fsa();
419 }
420 return 0;
421 }
422
423 static void
424 ipc_connection_destroyed(qb_ipcs_connection_t *c)
425 {
426 crm_trace("Connection %p", c);
427 ipc_client_disconnected(c);
428 }
429
430
431 void
432 do_stop(long long action,
433 enum crmd_fsa_cause cause,
434 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
435 {
436 crm_trace("Closing IPC server");
437 mainloop_del_ipc_server(ipcs); ipcs = NULL;
438 register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
439 }
440
441
442 void
443 do_started(long long action,
444 enum crmd_fsa_cause cause,
445 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
446 {
447 static struct qb_ipcs_service_handlers crmd_callbacks = {
448 .connection_accept = accept_controller_client,
449 .connection_created = NULL,
450 .msg_process = dispatch_controller_ipc,
451 .connection_closed = ipc_client_disconnected,
452 .connection_destroyed = ipc_connection_destroyed
453 };
454
455 if (cur_state != S_STARTING) {
456 crm_err("Start cancelled... %s", fsa_state2string(cur_state));
457 return;
458
459 } else if (!pcmk_is_set(fsa_input_register, R_MEMBERSHIP)) {
460 crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
461
462 crmd_fsa_stall(TRUE);
463 return;
464
465 } else if (!pcmk_is_set(fsa_input_register, R_LRM_CONNECTED)) {
466 crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
467
468 crmd_fsa_stall(TRUE);
469 return;
470
471 } else if (!pcmk_is_set(fsa_input_register, R_CIB_CONNECTED)) {
472 crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
473
474 crmd_fsa_stall(TRUE);
475 return;
476
477 } else if (!pcmk_is_set(fsa_input_register, R_READ_CONFIG)) {
478 crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
479
480 crmd_fsa_stall(TRUE);
481 return;
482
483 } else if (!pcmk_is_set(fsa_input_register, R_PEER_DATA)) {
484
485 crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
486 crmd_fsa_stall(TRUE);
487 return;
488 }
489
490 crm_debug("Init server comms");
491 ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
492 if (ipcs == NULL) {
493 crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
494 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
495 } else {
496 crm_notice("Pacemaker controller successfully started and accepting connections");
497 }
498 controld_trigger_fencer_connect();
499
500 controld_clear_fsa_input_flags(R_STARTING);
501 register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
502 }
503
504
505 void
506 do_recover(long long action,
507 enum crmd_fsa_cause cause,
508 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
509 {
510 controld_set_fsa_input_flags(R_IN_RECOVERY);
511 crm_warn("Fast-tracking shutdown in response to errors");
512
513 register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
514 }
515
516 static pcmk__cluster_option_t controller_options[] = {
517
518
519
520
521
522 {
523 "dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL,
524 N_("Pacemaker version on cluster node elected Designated Controller (DC)"),
525 N_("Includes a hash which identifies the exact changeset the code was "
526 "built from. Used for diagnostic purposes.")
527 },
528 {
529 "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
530 N_("The messaging stack on which Pacemaker is currently running"),
531 N_("Used for informational and diagnostic purposes.")
532 },
533 {
534 "cluster-name", NULL, "string", NULL, NULL, NULL,
535 N_("An arbitrary name for the cluster"),
536 N_("This optional value is mostly for users' convenience as desired "
537 "in administration, but may also be used in Pacemaker "
538 "configuration rules via the #cluster-name node attribute, and "
539 "by higher-level tools and resource agents.")
540 },
541 {
542 XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time",
543 NULL, "20s", pcmk__valid_interval_spec,
544 N_("How long to wait for a response from other nodes during start-up"),
545 N_("The optimal value will depend on the speed and load of your network "
546 "and the type of switches used.")
547 },
548 {
549 XML_CONFIG_ATTR_RECHECK, NULL, "time",
550 N_("Zero disables polling, while positive values are an interval in seconds"
551 "(unless other units are specified, for example \"5min\")"),
552 "15min", pcmk__valid_interval_spec,
553 N_("Polling interval to recheck cluster state and evaluate rules "
554 "with date specifications"),
555 N_("Pacemaker is primarily event-driven, and looks ahead to know when to "
556 "recheck cluster state for failure timeouts and most time-based "
557 "rules. However, it will also recheck the cluster after this "
558 "amount of inactivity, to evaluate rules with date specifications "
559 "and serve as a fail-safe for certain types of scheduler bugs.")
560 },
561 {
562 "load-threshold", NULL, "percentage", NULL,
563 "80%", pcmk__valid_percentage,
564 N_("Maximum amount of system load that should be used by cluster nodes"),
565 N_("The cluster will slow down its recovery process when the amount of "
566 "system resources used (currently CPU) approaches this limit"),
567 },
568 {
569 "node-action-limit", NULL, "integer", NULL,
570 "0", pcmk__valid_number,
571 N_("Maximum number of jobs that can be scheduled per node "
572 "(defaults to 2x cores)")
573 },
574 { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL,
575 N_("How a cluster node should react if notified of its own fencing"),
576 N_("A cluster node may receive notification of its own fencing if fencing "
577 "is misconfigured, or if fabric fencing is in use that doesn't cut "
578 "cluster communication. Allowed values are \"stop\" to attempt to "
579 "immediately stop Pacemaker and stay stopped, or \"panic\" to attempt "
580 "to immediately reboot the local node, falling back to stop on failure.")
581 },
582 {
583 XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL,
584 "2min", pcmk__valid_interval_spec,
585 "*** Advanced Use Only ***",
586 N_("Declare an election failed if it is not decided within this much "
587 "time. If you need to adjust this value, it probably indicates "
588 "the presence of a bug.")
589 },
590 {
591 XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL,
592 "20min", pcmk__valid_interval_spec,
593 "*** Advanced Use Only ***",
594 N_("Exit immediately if shutdown does not complete within this much "
595 "time. If you need to adjust this value, it probably indicates "
596 "the presence of a bug.")
597 },
598 {
599 "join-integration-timeout", "crmd-integration-timeout", "time", NULL,
600 "3min", pcmk__valid_interval_spec,
601 "*** Advanced Use Only ***",
602 N_("If you need to adjust this value, it probably indicates "
603 "the presence of a bug.")
604 },
605 {
606 "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL,
607 "30min", pcmk__valid_interval_spec,
608 "*** Advanced Use Only ***",
609 N_("If you need to adjust this value, it probably indicates "
610 "the presence of a bug.")
611 },
612 {
613 "transition-delay", "crmd-transition-delay", "time", NULL,
614 "0s", pcmk__valid_interval_spec,
615 N_("*** Advanced Use Only *** Enabling this option will slow down "
616 "cluster recovery under all conditions"),
617 N_("Delay cluster recovery for this much time to allow for additional "
618 "events to occur. Useful if your configuration is sensitive to "
619 "the order in which ping updates arrive.")
620 },
621 {
622 "stonith-watchdog-timeout", NULL, "time", NULL,
623 "0", controld_verify_stonith_watchdog_timeout,
624 N_("How long before nodes can be assumed to be safely down when "
625 "watchdog-based self-fencing via SBD is in use"),
626 N_("If this is set to a positive value, lost nodes are assumed to "
627 "self-fence using watchdog-based SBD within this much time. This "
628 "does not require a fencing resource to be explicitly configured, "
629 "though a fence_watchdog resource can be configured, to limit use "
630 "to specific nodes. If this is set to 0 (the default), the cluster "
631 "will never assume watchdog-based self-fencing. If this is set to a "
632 "negative value, the cluster will use twice the local value of the "
633 "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, "
634 "or otherwise treat this as 0. WARNING: When used, this timeout "
635 "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use "
636 "watchdog-based SBD, and Pacemaker will refuse to start on any of "
637 "those nodes where this is not true for the local value or SBD is "
638 "not active. When this is set to a negative value, "
639 "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes "
640 "that use SBD, otherwise data corruption or loss could occur.")
641 },
642 {
643 "stonith-max-attempts", NULL, "integer", NULL,
644 "10", pcmk__valid_positive_number,
645 N_("How many times fencing can fail before it will no longer be "
646 "immediately re-attempted on a target")
647 },
648
649
650 {
651 "no-quorum-policy", NULL, "select",
652 "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum,
653 "What to do when the cluster does not have quorum", NULL
654 },
655 {
656 XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
657 "false", pcmk__valid_boolean,
658 "Whether to lock resources to a cleanly shut down node",
659 "When true, resources active on a node when it is cleanly shut down "
660 "are kept \"locked\" to that node (not allowed to run elsewhere) "
661 "until they start again on that node after it rejoins (or for at "
662 "most shutdown-lock-limit, if set). Stonith resources and "
663 "Pacemaker Remote connections are never locked. Clone and bundle "
664 "instances and the promoted role of promotable clones are currently"
665 " never locked, though support could be added in a future release."
666 },
667 };
668
669 void
670 crmd_metadata(void)
671 {
672 const char *desc_short = "Pacemaker controller options";
673 const char *desc_long = "Cluster options used by Pacemaker's controller";
674
675 gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short,
676 desc_long, controller_options,
677 PCMK__NELEM(controller_options));
678 printf("%s", s);
679 g_free(s);
680 }
681
682 static const char *
683 controller_option(GHashTable *options, const char *name)
684 {
685 return pcmk__cluster_option(options, controller_options,
686 PCMK__NELEM(controller_options), name);
687 }
688
689 static void
690 config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
691 {
692 const char *value = NULL;
693 GHashTable *config_hash = NULL;
694 crm_time_t *now = crm_time_new(NULL);
695 xmlNode *crmconfig = NULL;
696 xmlNode *alerts = NULL;
697
698 if (rc != pcmk_ok) {
699 fsa_data_t *msg_data = NULL;
700
701 crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
702 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
703
704 if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
705 crm_err("The cluster is mis-configured - shutting down and staying down");
706 controld_set_fsa_input_flags(R_STAYDOWN);
707 }
708 goto bail;
709 }
710
711 crmconfig = output;
712 if ((crmconfig) &&
713 (crm_element_name(crmconfig)) &&
714 (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
715 crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
716 }
717 if (!crmconfig) {
718 fsa_data_t *msg_data = NULL;
719
720 crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
721 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
722 goto bail;
723 }
724
725 crm_debug("Call %d : Parsing CIB options", call_id);
726 config_hash = pcmk__strkey_table(free, free);
727 pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL,
728 config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
729
730 pcmk__validate_cluster_options(config_hash, controller_options,
731 PCMK__NELEM(controller_options));
732
733 value = controller_option(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
734 election_trigger->period_ms = crm_parse_interval_spec(value);
735
736 value = controller_option(config_hash, "node-action-limit");
737 throttle_update_job_max(value);
738
739 value = controller_option(config_hash, "load-threshold");
740 if(value) {
741 throttle_set_load_target(strtof(value, NULL) / 100.0);
742 }
743
744 value = controller_option(config_hash, "no-quorum-policy");
745 if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) {
746 no_quorum_suicide_escalation = TRUE;
747 }
748
749 set_fence_reaction(controller_option(config_hash,
750 XML_CONFIG_ATTR_FENCE_REACTION));
751
752 value = controller_option(config_hash, "stonith-max-attempts");
753 update_stonith_max_attempts(value);
754
755 value = controller_option(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
756 shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
757 crm_debug("Shutdown escalation occurs if DC has not responded to request in %ums",
758 shutdown_escalation_timer->period_ms);
759
760 value = controller_option(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
761 controld_set_election_period(value);
762
763 value = controller_option(config_hash, XML_CONFIG_ATTR_RECHECK);
764 recheck_interval_ms = crm_parse_interval_spec(value);
765 crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms);
766
767 value = controller_option(config_hash, "transition-delay");
768 transition_timer->period_ms = crm_parse_interval_spec(value);
769
770 value = controller_option(config_hash, "join-integration-timeout");
771 integration_timer->period_ms = crm_parse_interval_spec(value);
772
773 value = controller_option(config_hash, "join-finalization-timeout");
774 finalization_timer->period_ms = crm_parse_interval_spec(value);
775
776 value = controller_option(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK);
777 controld_shutdown_lock_enabled = crm_is_true(value);
778
779 free(fsa_cluster_name);
780 fsa_cluster_name = NULL;
781
782 value = g_hash_table_lookup(config_hash, "cluster-name");
783 if (value) {
784 fsa_cluster_name = strdup(value);
785 }
786
787 alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
788 crmd_unpack_alerts(alerts);
789
790 controld_set_fsa_input_flags(R_READ_CONFIG);
791 crm_trace("Triggering FSA: %s", __func__);
792 mainloop_set_trigger(fsa_source);
793
794 g_hash_table_destroy(config_hash);
795 bail:
796 crm_time_free(now);
797 }
798
799 gboolean
800 crm_read_options(gpointer user_data)
801 {
802 int call_id =
803 fsa_cib_conn->cmds->query(fsa_cib_conn,
804 "//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS,
805 NULL, cib_xpath | cib_scope_local);
806
807 fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback);
808 crm_trace("Querying the CIB... call %d", call_id);
809 return TRUE;
810 }
811
812
813 void
814 do_read_config(long long action,
815 enum crmd_fsa_cause cause,
816 enum crmd_fsa_state cur_state,
817 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
818 {
819 throttle_init();
820 mainloop_set_trigger(config_read);
821 }
822
823 void
824 crm_shutdown(int nsig)
825 {
826 if ((crmd_mainloop == NULL) || !g_main_loop_is_running(crmd_mainloop)) {
827 crmd_exit(CRM_EX_OK);
828 return;
829 }
830
831 if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
832 crm_err("Escalating shutdown");
833 register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
834 return;
835 }
836
837 controld_set_fsa_input_flags(R_SHUTDOWN);
838 register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
839
840 if (shutdown_escalation_timer->period_ms == 0) {
841 const char *value = controller_option(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
842
843 shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
844 }
845
846 crm_notice("Initiating controller shutdown sequence " CRM_XS
847 " limit=%ums", shutdown_escalation_timer->period_ms);
848 controld_start_timer(shutdown_escalation_timer);
849 }