This source file includes following definitions.
- do_ha_control
- do_shutdown
- do_shutdown_req
- crmd_fast_exit
- crmd_exit
- do_exit
- sigpipe_ignore
- do_startup
- accept_controller_client
- dispatch_controller_ipc
- ipc_client_disconnected
- ipc_connection_destroyed
- do_stop
- do_started
- do_recover
- config_query_callback
- controld_trigger_config_as
- crm_read_options
- do_read_config
- crm_shutdown
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <sys/types.h>
14 #include <sys/stat.h>
15
16 #include <crm/crm.h>
17 #include <crm/common/xml.h>
18 #include <crm/pengine/rules.h>
19 #include <crm/cluster/internal.h>
20 #include <crm/cluster/election_internal.h>
21 #include <crm/common/ipc_internal.h>
22
23 #include <pacemaker-controld.h>
24
25 static qb_ipcs_service_t *ipcs = NULL;
26
27 static crm_trigger_t *config_read_trigger = NULL;
28
29 #if SUPPORT_COROSYNC
30 extern gboolean crm_connect_corosync(pcmk_cluster_t *cluster);
31 #endif
32
33 static void crm_shutdown(int nsig);
34 static gboolean crm_read_options(gpointer user_data);
35
36
37 void
38 do_ha_control(long long action,
39 enum crmd_fsa_cause cause,
40 enum crmd_fsa_state cur_state,
41 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
42 {
43 gboolean registered = FALSE;
44 static pcmk_cluster_t *cluster = NULL;
45
46 if (cluster == NULL) {
47 cluster = pcmk_cluster_new();
48 }
49
50 if (action & A_HA_DISCONNECT) {
51 pcmk_cluster_disconnect(cluster);
52 crm_info("Disconnected from the cluster");
53
54 controld_set_fsa_input_flags(R_HA_DISCONNECTED);
55 }
56
57 if (action & A_HA_CONNECT) {
58 pcmk__cluster_set_status_callback(&peer_update_callback);
59 pcmk__cluster_set_autoreap(false);
60
61 #if SUPPORT_COROSYNC
62 if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
63 registered = crm_connect_corosync(cluster);
64 }
65 #endif
66
67 if (registered) {
68 controld_election_init(cluster->uname);
69 controld_globals.our_nodename = cluster->uname;
70 controld_globals.our_uuid = cluster->uuid;
71 if(cluster->uuid == NULL) {
72 crm_err("Could not obtain local uuid");
73 registered = FALSE;
74 }
75 }
76
77 if (!registered) {
78 controld_set_fsa_input_flags(R_HA_DISCONNECTED);
79 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
80 return;
81 }
82
83 populate_cib_nodes(node_update_none, __func__);
84 controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
85 crm_info("Connected to the cluster");
86 }
87
88 if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
89 crm_err("Unexpected action %s in %s", fsa_action2string(action),
90 __func__);
91 }
92 }
93
94
95 void
96 do_shutdown(long long action,
97 enum crmd_fsa_cause cause,
98 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
99 {
100
101 controld_set_fsa_input_flags(R_SHUTDOWN);
102 controld_disconnect_fencer(FALSE);
103 }
104
105
106 void
107 do_shutdown_req(long long action,
108 enum crmd_fsa_cause cause,
109 enum crmd_fsa_state cur_state,
110 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
111 {
112 xmlNode *msg = NULL;
113
114 controld_set_fsa_input_flags(R_SHUTDOWN);
115
116 crm_info("Sending shutdown request to all peers (DC is %s)",
117 pcmk__s(controld_globals.dc_name, "not set"));
118 msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
119
120 if (!pcmk__cluster_send_message(NULL, crm_msg_crmd, msg)) {
121 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
122 }
123 free_xml(msg);
124 }
125
126 void
127 crmd_fast_exit(crm_exit_t exit_code)
128 {
129 if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
130 crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
131 exit_code, CRM_EX_FATAL);
132 exit_code = CRM_EX_FATAL;
133
134 } else if ((exit_code == CRM_EX_OK)
135 && pcmk_is_set(controld_globals.fsa_input_register,
136 R_IN_RECOVERY)) {
137 crm_err("Could not recover from internal error");
138 exit_code = CRM_EX_ERROR;
139 }
140
141 if (controld_globals.logger_out != NULL) {
142 controld_globals.logger_out->finish(controld_globals.logger_out,
143 exit_code, true, NULL);
144 pcmk__output_free(controld_globals.logger_out);
145 controld_globals.logger_out = NULL;
146 }
147
148 crm_exit(exit_code);
149 }
150
151 crm_exit_t
152 crmd_exit(crm_exit_t exit_code)
153 {
154 GMainLoop *mloop = controld_globals.mainloop;
155
156 static bool in_progress = FALSE;
157
158 if (in_progress && (exit_code == CRM_EX_OK)) {
159 crm_debug("Exit is already in progress");
160 return exit_code;
161
162 } else if(in_progress) {
163 crm_notice("Error during shutdown process, exiting now with status %d (%s)",
164 exit_code, crm_exit_str(exit_code));
165 crm_write_blackbox(SIGTRAP, NULL);
166 crmd_fast_exit(exit_code);
167 }
168
169 in_progress = TRUE;
170 crm_trace("Preparing to exit with status %d (%s)",
171 exit_code, crm_exit_str(exit_code));
172
173
174 controld_set_fsa_input_flags(R_HA_DISCONNECTED);
175
176
177
178 if(ipcs) {
179 crm_trace("Closing IPC server");
180 mainloop_del_ipc_server(ipcs);
181 ipcs = NULL;
182 }
183
184 controld_close_attrd_ipc();
185 controld_shutdown_schedulerd_ipc();
186 controld_disconnect_fencer(TRUE);
187
188 if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) {
189 crm_debug("No mainloop detected");
190 exit_code = CRM_EX_ERROR;
191 }
192
193
194
195
196
197
198
199 if (exit_code != CRM_EX_OK) {
200 crm_notice("Forcing immediate exit with status %d (%s)",
201 exit_code, crm_exit_str(exit_code));
202 crm_write_blackbox(SIGTRAP, NULL);
203 crmd_fast_exit(exit_code);
204 }
205
206
207
208 for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
209 iter = iter->next) {
210 fsa_data_t *fsa_data = (fsa_data_t *) iter->data;
211
212 crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
213 fsa_input2string(fsa_data->fsa_input),
214 fsa_state2string(controld_globals.fsa_state),
215 fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
216 delete_fsa_input(fsa_data);
217 }
218
219 controld_clear_fsa_input_flags(R_MEMBERSHIP);
220
221 g_list_free(controld_globals.fsa_message_queue);
222 controld_globals.fsa_message_queue = NULL;
223
224 controld_free_node_pending_timers();
225 controld_election_fini();
226
227
228
229
230
231 controld_disconnect_cib_manager();
232
233 verify_stopped(controld_globals.fsa_state, LOG_WARNING);
234 controld_clear_fsa_input_flags(R_LRM_CONNECTED);
235 lrm_state_destroy_all();
236
237 mainloop_destroy_trigger(config_read_trigger);
238 config_read_trigger = NULL;
239
240 controld_destroy_fsa_trigger();
241 controld_destroy_transition_trigger();
242
243 pcmk__client_cleanup();
244 pcmk__cluster_destroy_node_caches();
245
246 controld_free_fsa_timers();
247 te_cleanup_stonith_history_sync(NULL, TRUE);
248 controld_free_sched_timer();
249
250 free(controld_globals.our_nodename);
251 controld_globals.our_nodename = NULL;
252
253 free(controld_globals.our_uuid);
254 controld_globals.our_uuid = NULL;
255
256 free(controld_globals.dc_name);
257 controld_globals.dc_name = NULL;
258
259 free(controld_globals.dc_version);
260 controld_globals.dc_version = NULL;
261
262 free(controld_globals.cluster_name);
263 controld_globals.cluster_name = NULL;
264
265 free(controld_globals.te_uuid);
266 controld_globals.te_uuid = NULL;
267
268 free_max_generation();
269 controld_destroy_failed_sync_table();
270 controld_destroy_outside_events_table();
271
272 mainloop_destroy_signal(SIGPIPE);
273 mainloop_destroy_signal(SIGUSR1);
274 mainloop_destroy_signal(SIGTERM);
275 mainloop_destroy_signal(SIGTRAP);
276
277
278 if (mloop) {
279 GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop);
280
281
282 controld_globals.mainloop = NULL;
283
284
285 mainloop_destroy_signal(SIGCHLD);
286
287 crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
288
289 {
290 int lpc = 0;
291
292 while((g_main_context_pending(ctx) && lpc < 10)) {
293 lpc++;
294 crm_trace("Iteration %d", lpc);
295 g_main_context_dispatch(ctx);
296 }
297 }
298
299 crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
300 g_main_loop_quit(mloop);
301
302
303 g_main_loop_unref(mloop);
304 } else {
305 mainloop_destroy_signal(SIGCHLD);
306 }
307
308 cib_delete(controld_globals.cib_conn);
309 controld_globals.cib_conn = NULL;
310
311 throttle_fini();
312
313
314 crm_trace("Done preparing for exit with status %d (%s)",
315 exit_code, crm_exit_str(exit_code));
316 return exit_code;
317 }
318
319
320 void
321 do_exit(long long action,
322 enum crmd_fsa_cause cause,
323 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
324 {
325 crm_exit_t exit_code = CRM_EX_OK;
326
327 if (pcmk_is_set(action, A_EXIT_1)) {
328 exit_code = CRM_EX_ERROR;
329 crm_err("Exiting now due to errors");
330 }
331 verify_stopped(cur_state, LOG_ERR);
332 crmd_exit(exit_code);
333 }
334
335 static void sigpipe_ignore(int nsig) { return; }
336
337
338 void
339 do_startup(long long action,
340 enum crmd_fsa_cause cause,
341 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
342 {
343 crm_debug("Registering Signal Handlers");
344 mainloop_add_signal(SIGTERM, crm_shutdown);
345 mainloop_add_signal(SIGPIPE, sigpipe_ignore);
346
347 config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH,
348 crm_read_options, NULL);
349
350 controld_init_fsa_trigger();
351 controld_init_transition_trigger();
352
353 crm_debug("Creating CIB manager and executor objects");
354 controld_globals.cib_conn = cib_new();
355
356 lrm_state_init_local();
357 if (controld_init_fsa_timers() == FALSE) {
358 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
359 }
360 }
361
362
363 static int32_t
364 accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
365 {
366 crm_trace("Accepting new IPC client connection");
367 if (pcmk__new_client(c, uid, gid) == NULL) {
368 return -ENOMEM;
369 }
370 return 0;
371 }
372
373
374 static int32_t
375 dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
376 {
377 uint32_t id = 0;
378 uint32_t flags = 0;
379 pcmk__client_t *client = pcmk__find_client(c);
380
381 xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
382
383 if (msg == NULL) {
384 pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL,
385 CRM_EX_PROTOCOL);
386 return 0;
387 }
388 pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL,
389 CRM_EX_INDETERMINATE);
390
391 CRM_ASSERT(client->user != NULL);
392 pcmk__update_acl_user(msg, PCMK__XA_CRM_USER, client->user);
393
394 crm_xml_add(msg, PCMK__XA_CRM_SYS_FROM, client->id);
395 if (controld_authorize_ipc_message(msg, client, NULL)) {
396 crm_trace("Processing IPC message from client %s",
397 pcmk__client_name(client));
398 route_message(C_IPC_MESSAGE, msg);
399 }
400
401 controld_trigger_fsa();
402 free_xml(msg);
403 return 0;
404 }
405
406 static int32_t
407 ipc_client_disconnected(qb_ipcs_connection_t *c)
408 {
409 pcmk__client_t *client = pcmk__find_client(c);
410
411 if (client) {
412 crm_trace("Disconnecting %sregistered client %s (%p/%p)",
413 (client->userdata? "" : "un"), pcmk__client_name(client),
414 c, client);
415 free(client->userdata);
416 pcmk__free_client(client);
417 controld_trigger_fsa();
418 }
419 return 0;
420 }
421
422 static void
423 ipc_connection_destroyed(qb_ipcs_connection_t *c)
424 {
425 crm_trace("Connection %p", c);
426 ipc_client_disconnected(c);
427 }
428
429
430 void
431 do_stop(long long action,
432 enum crmd_fsa_cause cause,
433 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
434 {
435 crm_trace("Closing IPC server");
436 mainloop_del_ipc_server(ipcs); ipcs = NULL;
437 register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
438 }
439
440
441 void
442 do_started(long long action,
443 enum crmd_fsa_cause cause,
444 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
445 {
446 static struct qb_ipcs_service_handlers crmd_callbacks = {
447 .connection_accept = accept_controller_client,
448 .connection_created = NULL,
449 .msg_process = dispatch_controller_ipc,
450 .connection_closed = ipc_client_disconnected,
451 .connection_destroyed = ipc_connection_destroyed
452 };
453
454 if (cur_state != S_STARTING) {
455 crm_err("Start cancelled... %s", fsa_state2string(cur_state));
456 return;
457
458 } else if (!pcmk_is_set(controld_globals.fsa_input_register,
459 R_MEMBERSHIP)) {
460 crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
461
462 crmd_fsa_stall(TRUE);
463 return;
464
465 } else if (!pcmk_is_set(controld_globals.fsa_input_register,
466 R_LRM_CONNECTED)) {
467 crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
468
469 crmd_fsa_stall(TRUE);
470 return;
471
472 } else if (!pcmk_is_set(controld_globals.fsa_input_register,
473 R_CIB_CONNECTED)) {
474 crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
475
476 crmd_fsa_stall(TRUE);
477 return;
478
479 } else if (!pcmk_is_set(controld_globals.fsa_input_register,
480 R_READ_CONFIG)) {
481 crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
482
483 crmd_fsa_stall(TRUE);
484 return;
485
486 } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) {
487
488 crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
489 crmd_fsa_stall(TRUE);
490 return;
491 }
492
493 crm_debug("Init server comms");
494 ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
495 if (ipcs == NULL) {
496 crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
497 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
498 } else {
499 crm_notice("Pacemaker controller successfully started and accepting connections");
500 }
501 controld_set_fsa_input_flags(R_ST_REQUIRED);
502 controld_timer_fencer_connect(GINT_TO_POINTER(TRUE));
503
504 controld_clear_fsa_input_flags(R_STARTING);
505 register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
506 }
507
508
509 void
510 do_recover(long long action,
511 enum crmd_fsa_cause cause,
512 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
513 {
514 controld_set_fsa_input_flags(R_IN_RECOVERY);
515 crm_warn("Fast-tracking shutdown in response to errors");
516
517 register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
518 }
519
520 static void
521 config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
522 {
523 const char *value = NULL;
524 GHashTable *config_hash = NULL;
525 crm_time_t *now = crm_time_new(NULL);
526 xmlNode *crmconfig = NULL;
527 xmlNode *alerts = NULL;
528
529 if (rc != pcmk_ok) {
530 fsa_data_t *msg_data = NULL;
531
532 crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
533 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
534
535 if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
536 crm_err("The cluster is mis-configured - shutting down and staying down");
537 controld_set_fsa_input_flags(R_STAYDOWN);
538 }
539 goto bail;
540 }
541
542 crmconfig = output;
543 if ((crmconfig != NULL) && !pcmk__xe_is(crmconfig, PCMK_XE_CRM_CONFIG)) {
544 crmconfig = pcmk__xe_first_child(crmconfig, PCMK_XE_CRM_CONFIG, NULL,
545 NULL);
546 }
547 if (!crmconfig) {
548 fsa_data_t *msg_data = NULL;
549
550 crm_err("Local CIB query for " PCMK_XE_CRM_CONFIG " section failed");
551 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
552 goto bail;
553 }
554
555 crm_debug("Call %d : Parsing CIB options", call_id);
556 config_hash = pcmk__strkey_table(free, free);
557 pe_unpack_nvpairs(crmconfig, crmconfig, PCMK_XE_CLUSTER_PROPERTY_SET, NULL,
558 config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, FALSE, now,
559 NULL);
560
561
562 pcmk__validate_cluster_options(config_hash);
563
564
565
566
567
568
569
570
571
572
573
574 value = g_hash_table_lookup(config_hash, PCMK_OPT_STONITH_WATCHDOG_TIMEOUT);
575 controld_verify_stonith_watchdog_timeout(value);
576
577 value = g_hash_table_lookup(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
578 if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei)
579 && (pcmk__locate_sbd() != 0)) {
580 controld_set_global_flags(controld_no_quorum_suicide);
581 }
582
583 value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK);
584 if (crm_is_true(value)) {
585 controld_set_global_flags(controld_shutdown_lock_enabled);
586 } else {
587 controld_clear_global_flags(controld_shutdown_lock_enabled);
588 }
589
590 value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
591 pcmk_parse_interval_spec(value, &controld_globals.shutdown_lock_limit);
592 controld_globals.shutdown_lock_limit /= 1000;
593
594 value = g_hash_table_lookup(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
595 pcmk_parse_interval_spec(value, &controld_globals.node_pending_timeout);
596 controld_globals.node_pending_timeout /= 1000;
597
598 value = g_hash_table_lookup(config_hash, PCMK_OPT_CLUSTER_NAME);
599 pcmk__str_update(&(controld_globals.cluster_name), value);
600
601
602 controld_configure_election(config_hash);
603 controld_configure_fencing(config_hash);
604 controld_configure_fsa_timers(config_hash);
605 controld_configure_throttle(config_hash);
606
607 alerts = pcmk__xe_first_child(output, PCMK_XE_ALERTS, NULL, NULL);
608 crmd_unpack_alerts(alerts);
609
610 controld_set_fsa_input_flags(R_READ_CONFIG);
611 controld_trigger_fsa();
612
613 g_hash_table_destroy(config_hash);
614 bail:
615 crm_time_free(now);
616 }
617
618
619
620
621
622
623
624
625 void
626 controld_trigger_config_as(const char *fn, int line)
627 {
628 if (config_read_trigger != NULL) {
629 crm_trace("%s:%d - Triggered config processing", fn, line);
630 mainloop_set_trigger(config_read_trigger);
631 }
632 }
633
634 gboolean
635 crm_read_options(gpointer user_data)
636 {
637 cib_t *cib_conn = controld_globals.cib_conn;
638 int call_id = cib_conn->cmds->query(cib_conn,
639 "//" PCMK_XE_CRM_CONFIG
640 " | //" PCMK_XE_ALERTS,
641 NULL, cib_xpath|cib_scope_local);
642
643 fsa_register_cib_callback(call_id, NULL, config_query_callback);
644 crm_trace("Querying the CIB... call %d", call_id);
645 return TRUE;
646 }
647
648
649 void
650 do_read_config(long long action,
651 enum crmd_fsa_cause cause,
652 enum crmd_fsa_state cur_state,
653 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
654 {
655 throttle_init();
656 controld_trigger_config();
657 }
658
659 static void
660 crm_shutdown(int nsig)
661 {
662 const char *value = NULL;
663 guint default_period_ms = 0;
664
665 if ((controld_globals.mainloop == NULL)
666 || !g_main_loop_is_running(controld_globals.mainloop)) {
667 crmd_exit(CRM_EX_OK);
668 return;
669 }
670
671 if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
672 crm_err("Escalating shutdown");
673 register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
674 return;
675 }
676
677 controld_set_fsa_input_flags(R_SHUTDOWN);
678 register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
679
680
681
682
683
684
685
686 value = pcmk__cluster_option(NULL, PCMK_OPT_SHUTDOWN_ESCALATION);
687 pcmk_parse_interval_spec(value, &default_period_ms);
688 controld_shutdown_start_countdown(default_period_ms);
689 }