This source file includes following definitions.
- election_timeout_popped
- do_ha_control
- need_spawn_pengine_from_crmd
- do_shutdown
- do_shutdown_req
- log_connected_client
- crmd_fast_exit
- crmd_exit
- do_exit
- sigpipe_ignore
- do_startup
- crmd_ipc_accept
- crmd_ipc_created
- crmd_ipc_dispatch
- crmd_ipc_closed
- crmd_ipc_destroy
- do_stop
- do_started
- do_recover
- crmd_metadata
- verify_crmd_options
- crmd_pref
- config_query_callback
- crm_read_options
- do_read_config
- crm_shutdown
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 #include <crm_internal.h>
20
21 #include <sys/param.h>
22
23 #include <crm/crm.h>
24
25 #include <crm/msg_xml.h>
26
27 #include <crm/pengine/rules.h>
28 #include <crm/cluster/internal.h>
29 #include <crm/cluster/election.h>
30 #include <crm/common/ipcs.h>
31
32 #include <crmd.h>
33 #include <crmd_fsa.h>
34 #include <fsa_proto.h>
35 #include <crmd_messages.h>
36 #include <crmd_callbacks.h>
37 #include <crmd_lrm.h>
38 #include <crmd_alerts.h>
39 #include <crmd_metadata.h>
40 #include <tengine.h>
41 #include <throttle.h>
42
43 #include <sys/types.h>
44 #include <sys/stat.h>
45
46 #ifdef RHEL7_COMPAT
47
48 #include <crm/pengine/rules_internal.h>
49 #endif
50
51 qb_ipcs_service_t *ipcs = NULL;
52
53 extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
54 extern void crmd_ha_connection_destroy(gpointer user_data);
55
56 void crm_shutdown(int nsig);
57 gboolean crm_read_options(gpointer user_data);
58
59 gboolean fsa_has_quorum = FALSE;
60 crm_trigger_t *fsa_source = NULL;
61 crm_trigger_t *config_read = NULL;
62 bool no_quorum_suicide_escalation = FALSE;
63
64 static gboolean
65 election_timeout_popped(gpointer data)
66 {
67
68 crm_info("Election failed: Declaring ourselves the winner");
69 register_fsa_input(C_TIMER_POPPED, I_ELECTION_DC, NULL);
70 return FALSE;
71 }
72
73
74 void
75 do_ha_control(long long action,
76 enum crmd_fsa_cause cause,
77 enum crmd_fsa_state cur_state,
78 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
79 {
80 gboolean registered = FALSE;
81 static crm_cluster_t *cluster = NULL;
82
83 if (cluster == NULL) {
84 cluster = calloc(1, sizeof(crm_cluster_t));
85 }
86
87 if (action & A_HA_DISCONNECT) {
88 crm_cluster_disconnect(cluster);
89 crm_info("Disconnected from the cluster");
90
91 set_bit(fsa_input_register, R_HA_DISCONNECTED);
92 }
93
94 if (action & A_HA_CONNECT) {
95 crm_set_status_callback(&peer_update_callback);
96 crm_set_autoreap(FALSE);
97
98 if (is_openais_cluster()) {
99 #if SUPPORT_COROSYNC
100 registered = crm_connect_corosync(cluster);
101 #endif
102 } else if (is_heartbeat_cluster()) {
103 #if SUPPORT_HEARTBEAT
104 cluster->destroy = crmd_ha_connection_destroy;
105 cluster->hb_dispatch = crmd_ha_msg_callback;
106
107 registered = crm_cluster_connect(cluster);
108 fsa_cluster_conn = cluster->hb_conn;
109
110 crm_trace("Be informed of Node Status changes");
111 if (registered &&
112 fsa_cluster_conn->llc_ops->set_nstatus_callback(fsa_cluster_conn,
113 crmd_ha_status_callback,
114 fsa_cluster_conn) != HA_OK) {
115
116 crm_err("Cannot set nstatus callback: %s",
117 fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn));
118 registered = FALSE;
119 }
120
121 crm_trace("Be informed of CRM Client Status changes");
122 if (registered &&
123 fsa_cluster_conn->llc_ops->set_cstatus_callback(fsa_cluster_conn,
124 crmd_client_status_callback,
125 fsa_cluster_conn) != HA_OK) {
126
127 crm_err("Cannot set cstatus callback: %s",
128 fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn));
129 registered = FALSE;
130 }
131
132 if (registered) {
133 crm_trace("Requesting an initial dump of CRMD client_status");
134 fsa_cluster_conn->llc_ops->client_status(fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD,
135 -1);
136 }
137 #endif
138 }
139 fsa_election = election_init(NULL, cluster->uname, 60000, election_timeout_popped);
140 fsa_our_uname = cluster->uname;
141 fsa_our_uuid = cluster->uuid;
142 if(cluster->uuid == NULL) {
143 crm_err("Could not obtain local uuid");
144 registered = FALSE;
145 }
146
147 if (registered == FALSE) {
148 set_bit(fsa_input_register, R_HA_DISCONNECTED);
149 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
150 return;
151 }
152
153 populate_cib_nodes(node_update_none, __FUNCTION__);
154 clear_bit(fsa_input_register, R_HA_DISCONNECTED);
155 crm_info("Connected to the cluster");
156 }
157
158 if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
159 crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
160 }
161 }
162
163 static bool
164 need_spawn_pengine_from_crmd(void)
165 {
166 static int result = -1;
167
168 if (result != -1)
169 return result;
170 if (!is_heartbeat_cluster()) {
171 result = 0;
172 return result;
173 }
174
175
176 result = TRUE;
177 crm_str_to_boolean(daemon_option("crmd_spawns_pengine"), &result);
178 return result;
179 }
180
181
182 void
183 do_shutdown(long long action,
184 enum crmd_fsa_cause cause,
185 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
186 {
187
188 set_bit(fsa_input_register, R_SHUTDOWN);
189
190 if (need_spawn_pengine_from_crmd()) {
191 if (is_set(fsa_input_register, pe_subsystem->flag_connected)) {
192 crm_info("Terminating the %s", pe_subsystem->name);
193 if (stop_subsystem(pe_subsystem, TRUE) == FALSE) {
194
195 crm_err("Faking %s exit", pe_subsystem->name);
196 clear_bit(fsa_input_register, pe_subsystem->flag_connected);
197 } else {
198 crm_info("Waiting for subsystems to exit");
199 crmd_fsa_stall(FALSE);
200 }
201 }
202 crm_info("All subsystems stopped, continuing");
203 }
204
205 if (stonith_api) {
206
207 clear_bit(fsa_input_register, R_ST_REQUIRED);
208
209 crm_info("Disconnecting STONITH...");
210 stonith_api->cmds->disconnect(stonith_api);
211 }
212 }
213
214
215 void
216 do_shutdown_req(long long action,
217 enum crmd_fsa_cause cause,
218 enum crmd_fsa_state cur_state,
219 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
220 {
221 xmlNode *msg = NULL;
222
223 set_bit(fsa_input_register, R_SHUTDOWN);
224 crm_info("Sending shutdown request to all peers (DC is %s)",
225 (fsa_our_dc? fsa_our_dc : "not set"));
226 msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
227
228
229 if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
230 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
231 }
232 free_xml(msg);
233 }
234
235 extern crm_ipc_t *attrd_ipc;
236 extern char *max_generation_from;
237 extern xmlNode *max_generation_xml;
238 extern GHashTable *resource_history;
239 extern GHashTable *voted;
240 extern char *te_client_id;
241
242 void log_connected_client(gpointer key, gpointer value, gpointer user_data);
243
244 void
245 log_connected_client(gpointer key, gpointer value, gpointer user_data)
246 {
247 crm_client_t *client = value;
248
249 crm_err("%s is still connected at exit", crm_client_name(client));
250 }
251
252 int
253 crmd_fast_exit(int rc)
254 {
255 if (is_set(fsa_input_register, R_STAYDOWN)) {
256 crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
257 rc, DAEMON_RESPAWN_STOP);
258 rc = DAEMON_RESPAWN_STOP;
259 }
260
261 if (rc == pcmk_ok && is_set(fsa_input_register, R_IN_RECOVERY)) {
262 crm_err("Could not recover from internal error");
263 rc = pcmk_err_generic;
264 }
265 return crm_exit(rc);
266 }
267
268 int
269 crmd_exit(int rc)
270 {
271 GListPtr gIter = NULL;
272 GMainLoop *mloop = crmd_mainloop;
273
274 static bool in_progress = FALSE;
275
276 if(in_progress && rc == 0) {
277 crm_debug("Exit is already in progress");
278 return rc;
279
280 } else if(in_progress) {
281 crm_notice("Error during shutdown process, terminating now with status %d: %s",
282 rc, pcmk_strerror(rc));
283 crm_write_blackbox(SIGTRAP, NULL);
284 crmd_fast_exit(rc);
285 }
286
287 in_progress = TRUE;
288 crm_trace("Preparing to exit: %d", rc);
289
290
291 set_bit(fsa_input_register, R_HA_DISCONNECTED);
292
293
294
295 if(ipcs) {
296 crm_trace("Closing IPC server");
297 mainloop_del_ipc_server(ipcs);
298 ipcs = NULL;
299 }
300
301 if (attrd_ipc) {
302 crm_trace("Closing attrd connection");
303 crm_ipc_close(attrd_ipc);
304 crm_ipc_destroy(attrd_ipc);
305 attrd_ipc = NULL;
306 }
307
308 if (pe_subsystem && pe_subsystem->client && pe_subsystem->client->ipcs) {
309 crm_trace("Disconnecting Policy Engine");
310 qb_ipcs_disconnect(pe_subsystem->client->ipcs);
311 }
312
313 if(stonith_api) {
314 crm_trace("Disconnecting fencing API");
315 clear_bit(fsa_input_register, R_ST_REQUIRED);
316 stonith_api->cmds->free(stonith_api); stonith_api = NULL;
317 }
318
319 if (rc == pcmk_ok && crmd_mainloop == NULL) {
320 crm_debug("No mainloop detected");
321 rc = EPROTO;
322 }
323
324
325
326
327
328
329
330 if(rc != pcmk_ok) {
331 crm_notice("Forcing immediate exit with status %d: %s",
332 rc, pcmk_strerror(rc));
333 crm_write_blackbox(SIGTRAP, NULL);
334 return crmd_fast_exit(rc);
335 }
336
337
338
339 for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) {
340 fsa_data_t *fsa_data = gIter->data;
341
342 crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
343 fsa_input2string(fsa_data->fsa_input),
344 fsa_state2string(fsa_state),
345 fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
346 delete_fsa_input(fsa_data);
347 }
348
349 clear_bit(fsa_input_register, R_MEMBERSHIP);
350 g_list_free(fsa_message_queue); fsa_message_queue = NULL;
351
352 free(pe_subsystem); pe_subsystem = NULL;
353 free(te_subsystem); te_subsystem = NULL;
354 free(cib_subsystem); cib_subsystem = NULL;
355
356 metadata_cache_fini();
357
358 election_fini(fsa_election);
359 fsa_election = NULL;
360
361
362
363
364 cib_free_callbacks(fsa_cib_conn);
365 fsa_cib_conn->cmds->signoff(fsa_cib_conn);
366
367 verify_stopped(fsa_state, LOG_WARNING);
368 clear_bit(fsa_input_register, R_LRM_CONNECTED);
369 lrm_state_destroy_all();
370
371
372 mainloop_destroy_trigger(fsa_source); fsa_source = NULL;
373
374 mainloop_destroy_trigger(config_read); config_read = NULL;
375 mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL;
376 mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL;
377
378 crm_client_cleanup();
379 crm_peer_destroy();
380
381 crm_timer_stop(transition_timer);
382 crm_timer_stop(integration_timer);
383 crm_timer_stop(finalization_timer);
384 crm_timer_stop(election_trigger);
385 election_timeout_stop(fsa_election);
386 crm_timer_stop(shutdown_escalation_timer);
387 crm_timer_stop(wait_timer);
388 crm_timer_stop(recheck_timer);
389
390 free(transition_timer); transition_timer = NULL;
391 free(integration_timer); integration_timer = NULL;
392 free(finalization_timer); finalization_timer = NULL;
393 free(election_trigger); election_trigger = NULL;
394 free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
395 free(wait_timer); wait_timer = NULL;
396 free(recheck_timer); recheck_timer = NULL;
397
398 free(fsa_our_dc_version); fsa_our_dc_version = NULL;
399 free(fsa_our_uname); fsa_our_uname = NULL;
400 free(fsa_our_uuid); fsa_our_uuid = NULL;
401 free(fsa_our_dc); fsa_our_dc = NULL;
402
403 free(fsa_cluster_name); fsa_cluster_name = NULL;
404
405 free(te_uuid); te_uuid = NULL;
406 free(te_client_id); te_client_id = NULL;
407 free(fsa_pe_ref); fsa_pe_ref = NULL;
408 free(failed_stop_offset); failed_stop_offset = NULL;
409 free(failed_start_offset); failed_start_offset = NULL;
410
411 free(max_generation_from); max_generation_from = NULL;
412 free_xml(max_generation_xml); max_generation_xml = NULL;
413
414 mainloop_destroy_signal(SIGPIPE);
415 mainloop_destroy_signal(SIGUSR1);
416 mainloop_destroy_signal(SIGTERM);
417 mainloop_destroy_signal(SIGTRAP);
418
419
420 if (mloop) {
421 GMainContext *ctx = g_main_loop_get_context(crmd_mainloop);
422
423
424 crmd_mainloop = NULL;
425
426
427 mainloop_destroy_signal(SIGCHLD);
428
429 crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
430
431 {
432 int lpc = 0;
433
434 while((g_main_context_pending(ctx) && lpc < 10)) {
435 lpc++;
436 crm_trace("Iteration %d", lpc);
437 g_main_context_dispatch(ctx);
438 }
439 }
440
441 crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
442 g_main_loop_quit(mloop);
443
444 #if SUPPORT_HEARTBEAT
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476 if (fsa_cluster_conn) {
477 crm_trace("Deleting heartbeat api object");
478 fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn);
479 fsa_cluster_conn = NULL;
480 }
481 #endif
482
483
484 g_main_loop_unref(mloop);
485
486 crm_trace("Done %d", rc);
487 } else {
488 mainloop_destroy_signal(SIGCHLD);
489 }
490
491 cib_delete(fsa_cib_conn);
492 fsa_cib_conn = NULL;
493
494 throttle_fini();
495
496
497 return rc;
498 }
499
500
501 void
502 do_exit(long long action,
503 enum crmd_fsa_cause cause,
504 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
505 {
506 int exit_code = pcmk_ok;
507 int log_level = LOG_INFO;
508 const char *exit_type = "gracefully";
509
510 if (action & A_EXIT_1) {
511
512 log_level = LOG_ERR;
513 exit_type = "forcefully";
514 exit_code = pcmk_err_generic;
515 }
516
517 verify_stopped(cur_state, LOG_ERR);
518 do_crm_log(log_level, "Performing %s - %s exiting the CRMd",
519 fsa_action2string(action), exit_type);
520
521 crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
522 crmd_exit(exit_code);
523 }
524
525 static void sigpipe_ignore(int nsig) { return; }
526
527
528 void
529 do_startup(long long action,
530 enum crmd_fsa_cause cause,
531 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
532 {
533 int was_error = 0;
534
535 crm_debug("Registering Signal Handlers");
536 mainloop_add_signal(SIGTERM, crm_shutdown);
537 mainloop_add_signal(SIGPIPE, sigpipe_ignore);
538
539 fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
540 config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);
541 transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL);
542
543 crm_debug("Creating CIB and LRM objects");
544 fsa_cib_conn = cib_new();
545
546 lrm_state_init_local();
547
548
549 transition_timer = calloc(1, sizeof(fsa_timer_t));
550 integration_timer = calloc(1, sizeof(fsa_timer_t));
551 finalization_timer = calloc(1, sizeof(fsa_timer_t));
552 election_trigger = calloc(1, sizeof(fsa_timer_t));
553 shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t));
554 wait_timer = calloc(1, sizeof(fsa_timer_t));
555 recheck_timer = calloc(1, sizeof(fsa_timer_t));
556
557 if (election_trigger != NULL) {
558 election_trigger->source_id = 0;
559 election_trigger->period_ms = -1;
560 election_trigger->fsa_input = I_DC_TIMEOUT;
561 election_trigger->callback = crm_timer_popped;
562 election_trigger->repeat = FALSE;
563 } else {
564 was_error = TRUE;
565 }
566
567 if (transition_timer != NULL) {
568 transition_timer->source_id = 0;
569 transition_timer->period_ms = -1;
570 transition_timer->fsa_input = I_PE_CALC;
571 transition_timer->callback = crm_timer_popped;
572 transition_timer->repeat = FALSE;
573 } else {
574 was_error = TRUE;
575 }
576
577 if (integration_timer != NULL) {
578 integration_timer->source_id = 0;
579 integration_timer->period_ms = -1;
580 integration_timer->fsa_input = I_INTEGRATED;
581 integration_timer->callback = crm_timer_popped;
582 integration_timer->repeat = FALSE;
583 } else {
584 was_error = TRUE;
585 }
586
587 if (finalization_timer != NULL) {
588 finalization_timer->source_id = 0;
589 finalization_timer->period_ms = -1;
590 finalization_timer->fsa_input = I_FINALIZED;
591 finalization_timer->callback = crm_timer_popped;
592 finalization_timer->repeat = FALSE;
593
594
595
596
597
598
599
600
601
602
603
604
605 finalization_timer->fsa_input = I_ELECTION;
606
607 } else {
608 was_error = TRUE;
609 }
610
611 if (shutdown_escalation_timer != NULL) {
612 shutdown_escalation_timer->source_id = 0;
613 shutdown_escalation_timer->period_ms = -1;
614 shutdown_escalation_timer->fsa_input = I_STOP;
615 shutdown_escalation_timer->callback = crm_timer_popped;
616 shutdown_escalation_timer->repeat = FALSE;
617 } else {
618 was_error = TRUE;
619 }
620
621 if (wait_timer != NULL) {
622 wait_timer->source_id = 0;
623 wait_timer->period_ms = 2000;
624 wait_timer->fsa_input = I_NULL;
625 wait_timer->callback = crm_timer_popped;
626 wait_timer->repeat = FALSE;
627 } else {
628 was_error = TRUE;
629 }
630
631 if (recheck_timer != NULL) {
632 recheck_timer->source_id = 0;
633 recheck_timer->period_ms = -1;
634 recheck_timer->fsa_input = I_PE_CALC;
635 recheck_timer->callback = crm_timer_popped;
636 recheck_timer->repeat = FALSE;
637 } else {
638 was_error = TRUE;
639 }
640
641
642 cib_subsystem = calloc(1, sizeof(struct crm_subsystem_s));
643 te_subsystem = calloc(1, sizeof(struct crm_subsystem_s));
644 pe_subsystem = calloc(1, sizeof(struct crm_subsystem_s));
645
646 if (cib_subsystem != NULL) {
647 cib_subsystem->pid = -1;
648 cib_subsystem->name = CRM_SYSTEM_CIB;
649 cib_subsystem->flag_connected = R_CIB_CONNECTED;
650 cib_subsystem->flag_required = R_CIB_REQUIRED;
651
652 } else {
653 was_error = TRUE;
654 }
655
656 if (te_subsystem != NULL) {
657 te_subsystem->pid = -1;
658 te_subsystem->name = CRM_SYSTEM_TENGINE;
659 te_subsystem->flag_connected = R_TE_CONNECTED;
660 te_subsystem->flag_required = R_TE_REQUIRED;
661
662 } else {
663 was_error = TRUE;
664 }
665
666 if (pe_subsystem != NULL) {
667 pe_subsystem->pid = -1;
668 pe_subsystem->path = CRM_DAEMON_DIR;
669 pe_subsystem->name = CRM_SYSTEM_PENGINE;
670 pe_subsystem->command = CRM_DAEMON_DIR "/" CRM_SYSTEM_PENGINE;
671 pe_subsystem->args = NULL;
672 pe_subsystem->flag_connected = R_PE_CONNECTED;
673 pe_subsystem->flag_required = R_PE_REQUIRED;
674
675 } else {
676 was_error = TRUE;
677 }
678
679 if (was_error == FALSE && need_spawn_pengine_from_crmd()) {
680 if (start_subsystem(pe_subsystem) == FALSE) {
681 was_error = TRUE;
682 }
683 }
684
685 if (was_error) {
686 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
687 }
688
689 }
690
691 static int32_t
692 crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
693 {
694 crm_trace("Connection %p", c);
695 if (crm_client_new(c, uid, gid) == NULL) {
696 return -EIO;
697 }
698 return 0;
699 }
700
701 static void
702 crmd_ipc_created(qb_ipcs_connection_t * c)
703 {
704 crm_trace("Connection %p", c);
705 }
706
707 static int32_t
708 crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
709 {
710 uint32_t id = 0;
711 uint32_t flags = 0;
712 crm_client_t *client = crm_client_get(c);
713
714 xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags);
715
716 crm_trace("Invoked: %s", crm_client_name(client));
717 crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__);
718
719 if (msg == NULL) {
720 return 0;
721 }
722
723 #if ENABLE_ACL
724 CRM_ASSERT(client->user != NULL);
725 crm_acl_get_set_user(msg, F_CRM_USER, client->user);
726 #endif
727
728 crm_trace("Processing msg from %s", crm_client_name(client));
729 crm_log_xml_trace(msg, "CRMd[inbound]");
730
731 crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
732 if (crmd_authorize_message(msg, client, NULL)) {
733 route_message(C_IPC_MESSAGE, msg);
734 }
735
736 trigger_fsa(fsa_source);
737 free_xml(msg);
738 return 0;
739 }
740
741 static int32_t
742 crmd_ipc_closed(qb_ipcs_connection_t * c)
743 {
744 crm_client_t *client = crm_client_get(c);
745 struct crm_subsystem_s *the_subsystem = NULL;
746
747 if (client == NULL) {
748 return 0;
749 }
750
751 crm_trace("Connection %p", c);
752
753 if (client->userdata == NULL) {
754 crm_trace("Client hadn't registered with us yet");
755
756 } else if (strcasecmp(CRM_SYSTEM_PENGINE, client->userdata) == 0) {
757 the_subsystem = pe_subsystem;
758
759 } else if (strcasecmp(CRM_SYSTEM_TENGINE, client->userdata) == 0) {
760 the_subsystem = te_subsystem;
761
762 } else if (strcasecmp(CRM_SYSTEM_CIB, client->userdata) == 0) {
763 the_subsystem = cib_subsystem;
764 }
765
766 if (the_subsystem != NULL) {
767 the_subsystem->source = NULL;
768 the_subsystem->client = NULL;
769 crm_info("Received HUP from %s:[%d]", the_subsystem->name, the_subsystem->pid);
770
771 } else {
772
773 crm_trace("Received HUP from transient client");
774 }
775
776 crm_trace("Disconnecting client %s (%p)", crm_client_name(client), client);
777 free(client->userdata);
778 crm_client_destroy(client);
779
780 trigger_fsa(fsa_source);
781 return 0;
782 }
783
784 static void
785 crmd_ipc_destroy(qb_ipcs_connection_t * c)
786 {
787 crm_trace("Connection %p", c);
788 crmd_ipc_closed(c);
789 }
790
791
792 void
793 do_stop(long long action,
794 enum crmd_fsa_cause cause,
795 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
796 {
797 crm_trace("Closing IPC server");
798 mainloop_del_ipc_server(ipcs); ipcs = NULL;
799 register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
800 }
801
802
803 void
804 do_started(long long action,
805 enum crmd_fsa_cause cause,
806 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
807 {
808 static struct qb_ipcs_service_handlers crmd_callbacks = {
809 .connection_accept = crmd_ipc_accept,
810 .connection_created = crmd_ipc_created,
811 .msg_process = crmd_ipc_dispatch,
812 .connection_closed = crmd_ipc_closed,
813 .connection_destroyed = crmd_ipc_destroy
814 };
815
816 if (cur_state != S_STARTING) {
817 crm_err("Start cancelled... %s", fsa_state2string(cur_state));
818 return;
819
820 } else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) {
821 crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
822
823 crmd_fsa_stall(TRUE);
824 return;
825
826 } else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) {
827 crm_info("Delaying start, LRM not connected (%.16llx)", R_LRM_CONNECTED);
828
829 crmd_fsa_stall(TRUE);
830 return;
831
832 } else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
833 crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
834
835 crmd_fsa_stall(TRUE);
836 return;
837
838 } else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) {
839 crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
840
841 crmd_fsa_stall(TRUE);
842 return;
843
844 } else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) {
845
846
847 crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
848
849 #if SUPPORT_HEARTBEAT
850 if (is_heartbeat_cluster()) {
851 HA_Message *msg = NULL;
852
853 crm_trace("Looking for a HA message");
854 msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0);
855 if (msg != NULL) {
856 crm_trace("There was a HA message");
857 ha_msg_del(msg);
858 }
859 }
860 #endif
861 crmd_fsa_stall(TRUE);
862 return;
863 }
864
865 crm_debug("Init server comms");
866 ipcs = crmd_ipc_server_init(&crmd_callbacks);
867 if (ipcs == NULL) {
868 crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
869 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
870 }
871
872 if (stonith_reconnect == NULL) {
873 int dummy;
874
875 stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy);
876 }
877 set_bit(fsa_input_register, R_ST_REQUIRED);
878 mainloop_set_trigger(stonith_reconnect);
879
880 crm_notice("The local CRM is operational");
881 clear_bit(fsa_input_register, R_STARTING);
882 register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
883 }
884
885
886 void
887 do_recover(long long action,
888 enum crmd_fsa_cause cause,
889 enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
890 {
891 set_bit(fsa_input_register, R_IN_RECOVERY);
892 crm_warn("Fast-tracking shutdown in response to errors");
893
894 register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
895 }
896
897
898 pe_cluster_option crmd_opts[] = {
899
900 { "dc-version", NULL, "string", NULL, "none", NULL,
901 "Version of Pacemaker on the cluster's DC.",
902 "Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes."
903 },
904 { "cluster-infrastructure", NULL, "string", NULL, "heartbeat", NULL,
905 "The messaging stack on which Pacemaker is currently running.",
906 "Used for informational and diagnostic purposes." },
907 { XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "20s", &check_time,
908 "How long to wait for a response from other nodes during startup.",
909 "The \"correct\" value will depend on the speed/load of your network and the type of switches used."
910 },
911 { XML_CONFIG_ATTR_RECHECK, "cluster_recheck_interval", "time",
912 "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)",
913 "15min", &check_timer,
914 "Polling interval for time based changes to options, resource parameters and constraints.",
915 "The Cluster is primarily event driven, however the configuration can have elements that change based on time."
916 " To ensure these changes take effect, we can optionally poll the cluster's status for changes."
917 },
918
919 #ifdef RHEL7_COMPAT
920
921
922
923
924
925 { "notification-agent", NULL, "string", NULL, "/dev/null", &check_script,
926 "Deprecated",
927 "Use alert path in alerts section instead"
928 },
929 { "notification-recipient", NULL, "string", NULL, "", NULL,
930 "Deprecated",
931 "Use recipient value in alerts section instead"
932 },
933 #endif
934
935 { "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization,
936 "The maximum amount of system resources that should be used by nodes in the cluster",
937 "The cluster will slow down its recovery process when the amount of system resources used"
938 " (currently CPU) approaches this limit",
939 },
940 { "node-action-limit", NULL, "integer", NULL, "0", &check_number,
941 "The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"},
942 { XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer,
943 "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
944 },
945 { XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer,
946 "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
947 },
948 { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer,
949 "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug."
950 },
951 { "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer,
952 "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug."
953 },
954 { "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer,
955 "*** Advanced Use Only ***\n"
956 "Enabling this option will slow down cluster recovery under all conditions",
957 "Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n"
958 "Useful if your configuration is sensitive to the order in which ping updates arrive."
959 },
960 { "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_sbd_timeout,
961 "How long to wait before we can assume nodes are safely down", NULL
962 },
963 { "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number,
964 "How many times stonith can fail before it will no longer be attempted on a target"
965 },
966 { "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL },
967
968 #if SUPPORT_PLUGIN
969 { XML_ATTR_EXPECTED_VOTES, NULL, "integer", NULL, "2", &check_number, "The number of nodes expected to be in the cluster", "Used to calculate quorum in openais based clusters." },
970 #endif
971 };
972
973
974 void
975 crmd_metadata(void)
976 {
977 config_metadata("CRM Daemon", "1.0",
978 "CRM Daemon Options",
979 "This is a fake resource that details the options that can be configured for the CRM Daemon.",
980 crmd_opts, DIMOF(crmd_opts));
981 }
982
983 static void
984 verify_crmd_options(GHashTable * options)
985 {
986 verify_all_options(options, crmd_opts, DIMOF(crmd_opts));
987 }
988
989 static const char *
990 crmd_pref(GHashTable * options, const char *name)
991 {
992 return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name);
993 }
994
995 static void
996 config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
997 {
998 const char *value = NULL;
999 GHashTable *config_hash = NULL;
1000 crm_time_t *now = crm_time_new(NULL);
1001 xmlNode *crmconfig = NULL;
1002 xmlNode *alerts = NULL;
1003
1004 if (rc != pcmk_ok) {
1005 fsa_data_t *msg_data = NULL;
1006
1007 crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
1008 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
1009
1010 if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
1011 crm_err("The cluster is mis-configured - shutting down and staying down");
1012 set_bit(fsa_input_register, R_STAYDOWN);
1013 }
1014 goto bail;
1015 }
1016
1017 crmconfig = output;
1018 if ((crmconfig) &&
1019 (crm_element_name(crmconfig)) &&
1020 (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
1021 crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
1022 }
1023 if (!crmconfig) {
1024 fsa_data_t *msg_data = NULL;
1025
1026 crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
1027 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
1028 goto bail;
1029 }
1030
1031 crm_debug("Call %d : Parsing CIB options", call_id);
1032 config_hash = crm_str_table_new();
1033 unpack_instance_attributes(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash,
1034 CIB_OPTIONS_FIRST, FALSE, now);
1035
1036 verify_crmd_options(config_hash);
1037
1038 #ifdef RHEL7_COMPAT
1039 {
1040 const char *script = crmd_pref(config_hash, "notification-agent");
1041 const char *recip = crmd_pref(config_hash, "notification-recipient");
1042
1043 pe_enable_legacy_alerts(script, recip);
1044 }
1045 #endif
1046
1047 value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
1048 election_trigger->period_ms = crm_get_msec(value);
1049
1050 value = crmd_pref(config_hash, "node-action-limit");
1051 throttle_update_job_max(value);
1052
1053 value = crmd_pref(config_hash, "load-threshold");
1054 if(value) {
1055 throttle_set_load_target(strtof(value, NULL) / 100.0);
1056 }
1057
1058 value = crmd_pref(config_hash, "no-quorum-policy");
1059 if (safe_str_eq(value, "suicide") && pcmk_locate_sbd()) {
1060 no_quorum_suicide_escalation = TRUE;
1061 }
1062
1063 value = crmd_pref(config_hash,"stonith-max-attempts");
1064 update_stonith_max_attempts(value);
1065
1066 value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
1067 shutdown_escalation_timer->period_ms = crm_get_msec(value);
1068
1069 crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms);
1070
1071 value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
1072 election_timeout_set_period(fsa_election, crm_get_msec(value));
1073
1074 value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK);
1075 recheck_timer->period_ms = crm_get_msec(value);
1076 crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms);
1077
1078 value = crmd_pref(config_hash, "crmd-transition-delay");
1079 transition_timer->period_ms = crm_get_msec(value);
1080
1081 value = crmd_pref(config_hash, "crmd-integration-timeout");
1082 integration_timer->period_ms = crm_get_msec(value);
1083
1084 value = crmd_pref(config_hash, "crmd-finalization-timeout");
1085 finalization_timer->period_ms = crm_get_msec(value);
1086
1087 #if SUPPORT_COROSYNC
1088 if (is_classic_ais_cluster()) {
1089 value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES);
1090 crm_debug("Sending expected-votes=%s to corosync", value);
1091 send_cluster_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais);
1092 }
1093 #endif
1094
1095 free(fsa_cluster_name);
1096 fsa_cluster_name = NULL;
1097
1098 value = g_hash_table_lookup(config_hash, "cluster-name");
1099 if (value) {
1100 fsa_cluster_name = strdup(value);
1101 }
1102
1103 alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
1104 crmd_unpack_alerts(alerts);
1105
1106 set_bit(fsa_input_register, R_READ_CONFIG);
1107 crm_trace("Triggering FSA: %s", __FUNCTION__);
1108 mainloop_set_trigger(fsa_source);
1109
1110 g_hash_table_destroy(config_hash);
1111 bail:
1112 crm_time_free(now);
1113 }
1114
1115 gboolean
1116 crm_read_options(gpointer user_data)
1117 {
1118 int call_id =
1119 fsa_cib_conn->cmds->query(fsa_cib_conn,
1120 "//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS,
1121 NULL, cib_xpath | cib_scope_local);
1122
1123 fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback);
1124 crm_trace("Querying the CIB... call %d", call_id);
1125 return TRUE;
1126 }
1127
1128
1129 void
1130 do_read_config(long long action,
1131 enum crmd_fsa_cause cause,
1132 enum crmd_fsa_state cur_state,
1133 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
1134 {
1135 throttle_init();
1136 mainloop_set_trigger(config_read);
1137 }
1138
1139 void
1140 crm_shutdown(int nsig)
1141 {
1142 if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) {
1143 if (is_set(fsa_input_register, R_SHUTDOWN)) {
1144 crm_err("Escalating the shutdown");
1145 register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
1146
1147 } else {
1148 set_bit(fsa_input_register, R_SHUTDOWN);
1149 register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
1150
1151 if (shutdown_escalation_timer->period_ms < 1) {
1152 const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
1153 int msec = crm_get_msec(value);
1154
1155 crm_debug("Using default shutdown escalation: %dms", msec);
1156 shutdown_escalation_timer->period_ms = msec;
1157 }
1158
1159
1160 crm_notice("Shutting down cluster resource manager " CRM_XS
1161 " limit=%dms", shutdown_escalation_timer->period_ms);
1162 crm_timer_start(shutdown_escalation_timer);
1163 }
1164
1165 } else {
1166 crm_info("exit from shutdown");
1167 crmd_exit(pcmk_ok);
1168 }
1169 }