This source file includes following definitions.
- update_stonith_max_attempts
- set_fence_reaction
- too_many_st_failures
- st_fail_count_reset
- st_fail_count_increment
- cib_fencing_updated
- send_stonith_update
- abort_for_stonith_failure
- add_stonith_cleanup
- remove_stonith_cleanup
- purge_stonith_cleanup
- execute_stonith_cleanup
- fail_incompletable_stonith
- tengine_stonith_connection_destroy
- handle_fence_notification
- te_connect_stonith
- controld_trigger_fencer_connect
- controld_disconnect_fencer
- do_stonith_history_sync
- tengine_stonith_callback
- fence_with_delay
- controld_execute_fence_action
- controld_verify_stonith_watchdog_timeout
- te_cleanup_stonith_history_sync
- tengine_stonith_history_synced
- stonith_history_sync_set_trigger
- te_trigger_stonith_history_sync
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include <crm/crm.h>
12 #include <crm/msg_xml.h>
13 #include <crm/common/xml.h>
14 #include <crm/stonith-ng.h>
15 #include <crm/fencing/internal.h>
16
17 #include <pacemaker-controld.h>
18
19 static void
20 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
21
22
23
24
25
26
27
28
29
30 struct st_fail_rec {
31 int count;
32 };
33
34 static bool fence_reaction_panic = FALSE;
35 static unsigned long int stonith_max_attempts = 10;
36 static GHashTable *stonith_failures = NULL;
37
38 void
39 update_stonith_max_attempts(const char *value)
40 {
41 stonith_max_attempts = char2score(value);
42 if (stonith_max_attempts < 1UL) {
43 stonith_max_attempts = 10UL;
44 }
45 }
46
47 void
48 set_fence_reaction(const char *reaction_s)
49 {
50 if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
51 fence_reaction_panic = TRUE;
52
53 } else {
54 if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) {
55 crm_warn("Invalid value '%s' for %s, using 'stop'",
56 reaction_s, XML_CONFIG_ATTR_FENCE_REACTION);
57 }
58 fence_reaction_panic = FALSE;
59 }
60 }
61
62 static gboolean
63 too_many_st_failures(const char *target)
64 {
65 GHashTableIter iter;
66 const char *key = NULL;
67 struct st_fail_rec *value = NULL;
68
69 if (stonith_failures == NULL) {
70 return FALSE;
71 }
72
73 if (target == NULL) {
74 g_hash_table_iter_init(&iter, stonith_failures);
75 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
76 (gpointer *) &value)) {
77
78 if (value->count >= stonith_max_attempts) {
79 target = (const char*)key;
80 goto too_many;
81 }
82 }
83 } else {
84 value = g_hash_table_lookup(stonith_failures, target);
85 if ((value != NULL) && (value->count >= stonith_max_attempts)) {
86 goto too_many;
87 }
88 }
89 return FALSE;
90
91 too_many:
92 crm_warn("Too many failures (%d) to fence %s, giving up",
93 value->count, target);
94 return TRUE;
95 }
96
97
98
99
100
101
102
103 void
104 st_fail_count_reset(const char *target)
105 {
106 if (stonith_failures == NULL) {
107 return;
108 }
109
110 if (target) {
111 struct st_fail_rec *rec = NULL;
112
113 rec = g_hash_table_lookup(stonith_failures, target);
114 if (rec) {
115 rec->count = 0;
116 }
117 } else {
118 GHashTableIter iter;
119 const char *key = NULL;
120 struct st_fail_rec *rec = NULL;
121
122 g_hash_table_iter_init(&iter, stonith_failures);
123 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
124 (gpointer *) &rec)) {
125 rec->count = 0;
126 }
127 }
128 }
129
130 static void
131 st_fail_count_increment(const char *target)
132 {
133 struct st_fail_rec *rec = NULL;
134
135 if (stonith_failures == NULL) {
136 stonith_failures = pcmk__strkey_table(free, free);
137 }
138
139 rec = g_hash_table_lookup(stonith_failures, target);
140 if (rec) {
141 rec->count++;
142 } else {
143 rec = malloc(sizeof(struct st_fail_rec));
144 if(rec == NULL) {
145 return;
146 }
147
148 rec->count = 1;
149 g_hash_table_insert(stonith_failures, strdup(target), rec);
150 }
151 }
152
153
154
155
156 static void
157 cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
158 void *user_data)
159 {
160 if (rc < pcmk_ok) {
161 crm_err("Fencing update %d for %s: failed - %s (%d)",
162 call_id, (char *)user_data, pcmk_strerror(rc), rc);
163 crm_log_xml_warn(msg, "Failed update");
164 abort_transition(INFINITY, pcmk__graph_shutdown, "CIB update failed",
165 NULL);
166
167 } else {
168 crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
169 }
170 }
171
172 static void
173 send_stonith_update(pcmk__graph_action_t *action, const char *target,
174 const char *uuid)
175 {
176 int rc = pcmk_ok;
177 crm_node_t *peer = NULL;
178
179
180
181
182
183 int flags = node_update_join | node_update_expected;
184
185
186 xmlNode *node_state = NULL;
187
188 CRM_CHECK(target != NULL, return);
189 CRM_CHECK(uuid != NULL, return);
190
191
192 peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);
193
194 CRM_CHECK(peer != NULL, return);
195
196 if (peer->state == NULL) {
197
198
199
200
201 flags |= node_update_cluster;
202 }
203
204 if (peer->uuid == NULL) {
205 crm_info("Recording uuid '%s' for node '%s'", uuid, target);
206 peer->uuid = strdup(uuid);
207 }
208
209 crmd_peer_down(peer, TRUE);
210
211
212 node_state = create_node_state_update(peer, flags, NULL, __func__);
213
214
215 if (peer->flags & crm_remote_node) {
216 char *now_s = pcmk__ttoa(time(NULL));
217
218 crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
219 free(now_s);
220 }
221
222
223 crm_xml_add(node_state, XML_ATTR_UUID, uuid);
224
225 rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
226 cib_quorum_override | cib_scope_local | cib_can_create);
227
228
229 crm_debug("Sending fencing update %d for %s", rc, target);
230 fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);
231
232
233
234
235 controld_delete_node_state(peer->uname, controld_section_all,
236 cib_scope_local);
237 free_xml(node_state);
238 return;
239 }
240
241
242
243
244
245
246
247
248
249 static void
250 abort_for_stonith_failure(enum pcmk__graph_next abort_action,
251 const char *target, const xmlNode *reason)
252 {
253
254
255
256 if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
257 abort_action = pcmk__graph_wait;
258 }
259 abort_transition(INFINITY, abort_action, "Stonith failed", reason);
260 }
261
262
263
264
265
266
267
268
269
270
271 static GList *stonith_cleanup_list = NULL;
272
273
274
275
276
277
278
279 void
280 add_stonith_cleanup(const char *target) {
281 stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
282 }
283
284
285
286
287
288
289
290 void
291 remove_stonith_cleanup(const char *target)
292 {
293 GList *iter = stonith_cleanup_list;
294
295 while (iter != NULL) {
296 GList *tmp = iter;
297 char *iter_name = tmp->data;
298
299 iter = iter->next;
300 if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
301 crm_trace("Removing %s from the cleanup list", iter_name);
302 stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
303 free(iter_name);
304 }
305 }
306 }
307
308
309
310
311
312 void
313 purge_stonith_cleanup(void)
314 {
315 if (stonith_cleanup_list) {
316 GList *iter = NULL;
317
318 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
319 char *target = iter->data;
320
321 crm_info("Purging %s from stonith cleanup list", target);
322 free(target);
323 }
324 g_list_free(stonith_cleanup_list);
325 stonith_cleanup_list = NULL;
326 }
327 }
328
329
330
331
332
333 void
334 execute_stonith_cleanup(void)
335 {
336 GList *iter;
337
338 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
339 char *target = iter->data;
340 crm_node_t *target_node = crm_get_peer(0, target);
341 const char *uuid = crm_peer_uuid(target_node);
342
343 crm_notice("Marking %s, target of a previous stonith action, as clean", target);
344 send_stonith_update(NULL, target, uuid);
345 free(target);
346 }
347 g_list_free(stonith_cleanup_list);
348 stonith_cleanup_list = NULL;
349 }
350
351
352
353
354
355
356
357
358
359 static stonith_t *stonith_api = NULL;
360 static crm_trigger_t *stonith_reconnect = NULL;
361 static char *te_client_id = NULL;
362
363 static gboolean
364 fail_incompletable_stonith(pcmk__graph_t *graph)
365 {
366 GList *lpc = NULL;
367 const char *task = NULL;
368 xmlNode *last_action = NULL;
369
370 if (graph == NULL) {
371 return FALSE;
372 }
373
374 for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
375 GList *lpc2 = NULL;
376 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
377
378 if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
379 continue;
380 }
381
382 for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
383 pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
384
385 if ((action->type != pcmk__cluster_graph_action)
386 || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
387 continue;
388 }
389
390 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
391 if (task && pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
392 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
393 last_action = action->xml;
394 pcmk__update_graph(graph, action);
395 crm_notice("Failing action %d (%s): fencer terminated",
396 action->id, ID(action->xml));
397 }
398 }
399 }
400
401 if (last_action != NULL) {
402 crm_warn("Fencer failure resulted in unrunnable actions");
403 abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
404 return TRUE;
405 }
406
407 return FALSE;
408 }
409
410 static void
411 tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
412 {
413 te_cleanup_stonith_history_sync(st, FALSE);
414
415 if (pcmk_is_set(fsa_input_register, R_ST_REQUIRED)) {
416 crm_crit("Fencing daemon connection failed");
417 mainloop_set_trigger(stonith_reconnect);
418
419 } else {
420 crm_info("Fencing daemon disconnected");
421 }
422
423 if (stonith_api) {
424
425
426
427 if (stonith_api->state != stonith_disconnected) {
428 stonith_api->cmds->disconnect(st);
429 }
430 stonith_api->cmds->remove_notification(stonith_api, NULL);
431 }
432
433 if (AM_I_DC) {
434 fail_incompletable_stonith(transition_graph);
435 trigger_graph();
436 }
437 }
438
439
440
441
442
443
444
445
446 static void
447 handle_fence_notification(stonith_t *st, stonith_event_t *event)
448 {
449 bool succeeded = true;
450 const char *executioner = "the cluster";
451 const char *client = "a client";
452 const char *reason = NULL;
453 int exec_status;
454
455 if (te_client_id == NULL) {
456 te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
457 (unsigned long) getpid());
458 }
459
460 if (event == NULL) {
461 crm_err("Notify data not found");
462 return;
463 }
464
465 if (event->executioner != NULL) {
466 executioner = event->executioner;
467 }
468 if (event->client_origin != NULL) {
469 client = event->client_origin;
470 }
471
472 exec_status = stonith__event_execution_status(event);
473 if ((stonith__event_exit_status(event) != CRM_EX_OK)
474 || (exec_status != PCMK_EXEC_DONE)) {
475 succeeded = false;
476 if (exec_status == PCMK_EXEC_DONE) {
477 exec_status = PCMK_EXEC_ERROR;
478 }
479 }
480 reason = stonith__event_exit_reason(event);
481
482 crmd_alert_fencing_op(event);
483
484 if (pcmk__str_eq("on", event->action, pcmk__str_none)) {
485
486 if (succeeded) {
487 crm_notice("%s was unfenced by %s at the request of %s@%s",
488 event->target, executioner, client, event->origin);
489 } else {
490 crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
491 event->target, executioner,
492 pcmk_exec_status_str(exec_status),
493 ((reason == NULL)? "" : ": "),
494 ((reason == NULL)? "" : reason),
495 stonith__event_exit_status(event));
496 }
497 return;
498 }
499
500 if (succeeded
501 && pcmk__str_eq(event->target, fsa_our_uname, pcmk__str_casei)) {
502
503
504
505
506
507
508
509
510
511 crm_crit("We were allegedly just fenced by %s for %s!",
512 executioner, event->origin);
513 if (fence_reaction_panic) {
514 pcmk__panic(__func__);
515 } else {
516 crm_exit(CRM_EX_FATAL);
517 }
518 return;
519 }
520
521
522
523
524
525 if (!AM_I_DC) {
526 if (succeeded) {
527 st_fail_count_reset(event->target);
528 } else {
529 st_fail_count_increment(event->target);
530 }
531 }
532
533 crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
534 "%s%s%s%s " CRM_XS " event=%s",
535 event->target, (succeeded? "" : " not"),
536 event->action, executioner, client, event->origin,
537 (succeeded? "OK" : pcmk_exec_status_str(exec_status)),
538 ((reason == NULL)? "" : " ("),
539 ((reason == NULL)? "" : reason),
540 ((reason == NULL)? "" : ")"),
541 event->id);
542
543 if (succeeded) {
544 crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
545 CRM_GET_PEER_ANY);
546 const char *uuid = NULL;
547
548 if (peer == NULL) {
549 return;
550 }
551
552 uuid = crm_peer_uuid(peer);
553
554 if (AM_I_DC) {
555
556 send_stonith_update(NULL, event->target, uuid);
557
558
559
560
561
562
563
564
565
566 if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
567
568
569
570 crm_info("External fencing operation from %s fenced %s",
571 client, event->target);
572 abort_transition(INFINITY, pcmk__graph_restart,
573 "External Fencing Operation", NULL);
574 }
575
576
577 } else if (pcmk__str_eq(fsa_our_dc, event->target,
578 pcmk__str_null_matches|pcmk__str_casei)
579 && !pcmk_is_set(peer->flags, crm_remote_node)) {
580
581 crm_notice("Fencing target %s %s our leader",
582 event->target, (fsa_our_dc? "was" : "may have been"));
583
584
585
586
587
588 if (pcmk__str_eq(event->executioner, fsa_our_uname,
589 pcmk__str_casei)) {
590 send_stonith_update(NULL, event->target, uuid);
591 }
592 add_stonith_cleanup(event->target);
593 }
594
595
596
597
598
599
600 if (pcmk_is_set(peer->flags, crm_remote_node)) {
601 remote_ra_fail(event->target);
602 }
603
604 crmd_peer_down(peer, TRUE);
605 }
606 }
607
608
609
610
611
612
613
614
615
616
617 static gboolean
618 te_connect_stonith(gpointer user_data)
619 {
620 int rc = pcmk_ok;
621
622 if (stonith_api == NULL) {
623 stonith_api = stonith_api_new();
624 if (stonith_api == NULL) {
625 crm_err("Could not connect to fencer: API memory allocation failed");
626 return TRUE;
627 }
628 }
629
630 if (stonith_api->state != stonith_disconnected) {
631 crm_trace("Already connected to fencer, no need to retry");
632 return TRUE;
633 }
634
635 if (user_data == NULL) {
636
637 rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
638 if (rc != pcmk_ok) {
639 crm_err("Could not connect to fencer in 30 attempts: %s "
640 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
641 }
642 } else {
643
644 rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
645 if (rc != pcmk_ok) {
646 if (pcmk_is_set(fsa_input_register, R_ST_REQUIRED)) {
647 crm_notice("Fencer connection failed (will retry): %s "
648 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
649 mainloop_set_trigger(stonith_reconnect);
650 } else {
651 crm_info("Fencer connection failed (ignoring because no longer required): %s "
652 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
653 }
654 return TRUE;
655 }
656 }
657
658 if (rc == pcmk_ok) {
659 stonith_api->cmds->register_notification(stonith_api,
660 T_STONITH_NOTIFY_DISCONNECT,
661 tengine_stonith_connection_destroy);
662 stonith_api->cmds->register_notification(stonith_api,
663 T_STONITH_NOTIFY_FENCE,
664 handle_fence_notification);
665 stonith_api->cmds->register_notification(stonith_api,
666 T_STONITH_NOTIFY_HISTORY_SYNCED,
667 tengine_stonith_history_synced);
668 te_trigger_stonith_history_sync(TRUE);
669 crm_notice("Fencer successfully connected");
670 }
671
672 return TRUE;
673 }
674
675
676
677
678
679 void
680 controld_trigger_fencer_connect(void)
681 {
682 if (stonith_reconnect == NULL) {
683 stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
684 te_connect_stonith,
685 GINT_TO_POINTER(TRUE));
686 }
687 controld_set_fsa_input_flags(R_ST_REQUIRED);
688 mainloop_set_trigger(stonith_reconnect);
689 }
690
691 void
692 controld_disconnect_fencer(bool destroy)
693 {
694 if (stonith_api) {
695
696 controld_clear_fsa_input_flags(R_ST_REQUIRED);
697
698 if (stonith_api->state != stonith_disconnected) {
699 stonith_api->cmds->disconnect(stonith_api);
700 }
701 stonith_api->cmds->remove_notification(stonith_api, NULL);
702 }
703 if (destroy) {
704 if (stonith_api) {
705 stonith_api->cmds->free(stonith_api);
706 stonith_api = NULL;
707 }
708 if (stonith_reconnect) {
709 mainloop_destroy_trigger(stonith_reconnect);
710 stonith_reconnect = NULL;
711 }
712 if (te_client_id) {
713 free(te_client_id);
714 te_client_id = NULL;
715 }
716 }
717 }
718
719 static gboolean
720 do_stonith_history_sync(gpointer user_data)
721 {
722 if (stonith_api && (stonith_api->state != stonith_disconnected)) {
723 stonith_history_t *history = NULL;
724
725 te_cleanup_stonith_history_sync(stonith_api, FALSE);
726 stonith_api->cmds->history(stonith_api,
727 st_opt_sync_call | st_opt_broadcast,
728 NULL, &history, 5);
729 stonith_history_free(history);
730 return TRUE;
731 } else {
732 crm_info("Skip triggering stonith history-sync as stonith is disconnected");
733 return FALSE;
734 }
735 }
736
737 static void
738 tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
739 {
740 char *uuid = NULL;
741 int stonith_id = -1;
742 int transition_id = -1;
743 pcmk__graph_action_t *action = NULL;
744 const char *target = NULL;
745
746 if ((data == NULL) || (data->userdata == NULL)) {
747 crm_err("Ignoring fence operation %d result: "
748 "No transition key given (bug?)",
749 ((data == NULL)? -1 : data->call_id));
750 return;
751 }
752
753 if (!AM_I_DC) {
754 const char *reason = stonith__exit_reason(data);
755
756 if (reason == NULL) {
757 reason = pcmk_exec_status_str(stonith__execution_status(data));
758 }
759 crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
760 data->call_id, stonith__exit_status(data), reason,
761 (const char *) data->userdata);
762 return;
763 }
764
765 CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
766 &stonith_id, NULL),
767 goto bail);
768
769 if (transition_graph->complete || (stonith_id < 0)
770 || !pcmk__str_eq(uuid, te_uuid, pcmk__str_none)
771 || (transition_graph->id != transition_id)) {
772 crm_info("Ignoring fence operation %d result: "
773 "Not from current transition " CRM_XS
774 " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
775 data->call_id, pcmk__btoa(transition_graph->complete),
776 stonith_id, uuid, te_uuid, transition_id, transition_graph->id);
777 goto bail;
778 }
779
780 action = controld_get_action(stonith_id);
781 if (action == NULL) {
782 crm_err("Ignoring fence operation %d result: "
783 "Action %d not found in transition graph (bug?) "
784 CRM_XS " uuid=%s transition=%d",
785 data->call_id, stonith_id, uuid, transition_id);
786 goto bail;
787 }
788
789 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
790 if (target == NULL) {
791 crm_err("Ignoring fence operation %d result: No target given (bug?)",
792 data->call_id);
793 goto bail;
794 }
795
796 stop_te_timer(action);
797 if (stonith__exit_status(data) == CRM_EX_OK) {
798 const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
799 const char *op = crm_meta_value(action->params, "stonith_action");
800
801 crm_info("Fence operation %d for %s succeeded", data->call_id, target);
802 if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
803 te_action_confirmed(action, NULL);
804 if (pcmk__str_eq("on", op, pcmk__str_casei)) {
805 const char *value = NULL;
806 char *now = pcmk__ttoa(time(NULL));
807 gboolean is_remote_node = FALSE;
808
809
810
811
812
813
814
815
816 if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
817 is_remote_node = TRUE;
818 }
819
820 update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
821 is_remote_node);
822 free(now);
823
824 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
825 update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
826 is_remote_node);
827
828 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
829 update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
830 is_remote_node);
831
832 } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
833 send_stonith_update(action, target, uuid);
834 pcmk__set_graph_action_flags(action,
835 pcmk__graph_action_sent_update);
836 }
837 }
838 st_fail_count_reset(target);
839
840 } else {
841 enum pcmk__graph_next abort_action = pcmk__graph_restart;
842 int status = stonith__execution_status(data);
843 const char *reason = stonith__exit_reason(data);
844
845 if (reason == NULL) {
846 if (status == PCMK_EXEC_DONE) {
847 reason = "Agent returned error";
848 } else {
849 reason = pcmk_exec_status_str(status);
850 }
851 }
852 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
853
854
855
856
857 if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
858 crm_warn("Fence operation %d for %s failed: %s "
859 "(aborting transition and giving up for now)",
860 data->call_id, target, reason);
861 abort_action = pcmk__graph_wait;
862 } else {
863 crm_notice("Fence operation %d for %s failed: %s "
864 "(aborting transition)", data->call_id, target, reason);
865 }
866
867
868
869
870
871 st_fail_count_increment(target);
872 abort_for_stonith_failure(abort_action, target, NULL);
873 }
874
875 pcmk__update_graph(transition_graph, action);
876 trigger_graph();
877
878 bail:
879 free(data->userdata);
880 free(uuid);
881 return;
882 }
883
884 static int
885 fence_with_delay(const char *target, const char *type, const char *delay)
886 {
887 uint32_t options = st_opt_none;
888 int timeout_sec = (int) (transition_graph->stonith_timeout / 1000);
889 int delay_i;
890
891 if (crmd_join_phase_count(crm_join_confirmed) == 1) {
892 stonith__set_call_options(options, target, st_opt_allow_suicide);
893 }
894 pcmk__scan_min_int(delay, &delay_i, 0);
895 return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
896 type, timeout_sec, 0, delay_i);
897 }
898
899
900
901
902
903
904
905
906
907
908 int
909 controld_execute_fence_action(pcmk__graph_t *graph,
910 pcmk__graph_action_t *action)
911 {
912 int rc = 0;
913 const char *id = NULL;
914 const char *uuid = NULL;
915 const char *target = NULL;
916 const char *type = NULL;
917 char *transition_key = NULL;
918 const char *priority_delay = NULL;
919 gboolean invalid_action = FALSE;
920
921 id = ID(action->xml);
922 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
923 uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
924 type = crm_meta_value(action->params, "stonith_action");
925
926 CRM_CHECK(id != NULL, invalid_action = TRUE);
927 CRM_CHECK(uuid != NULL, invalid_action = TRUE);
928 CRM_CHECK(type != NULL, invalid_action = TRUE);
929 CRM_CHECK(target != NULL, invalid_action = TRUE);
930
931 if (invalid_action) {
932 crm_log_xml_warn(action->xml, "BadAction");
933 return EPROTO;
934 }
935
936 priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
937
938 crm_notice("Requesting fencing (%s) of node %s "
939 CRM_XS " action=%s timeout=%u%s%s",
940 type, target, id, transition_graph->stonith_timeout,
941 priority_delay ? " priority_delay=" : "",
942 priority_delay ? priority_delay : "");
943
944
945 te_connect_stonith(NULL);
946
947 rc = fence_with_delay(target, type, priority_delay);
948 transition_key = pcmk__transition_key(transition_graph->id, action->id, 0,
949 te_uuid),
950 stonith_api->cmds->register_callback(stonith_api, rc,
951 (int) (transition_graph->stonith_timeout / 1000),
952 st_opt_timeout_updates, transition_key,
953 "tengine_stonith_callback", tengine_stonith_callback);
954 return pcmk_rc_ok;
955 }
956
957 bool
958 controld_verify_stonith_watchdog_timeout(const char *value)
959 {
960 gboolean rv = TRUE;
961
962 if (stonith_api && (stonith_api->state != stonith_disconnected) &&
963 stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
964 fsa_our_uname)) {
965 rv = pcmk__valid_sbd_timeout(value);
966 }
967 return rv;
968 }
969
970
971
972
973
974
975
976
977
978
979
980 static crm_trigger_t *stonith_history_sync_trigger = NULL;
981 static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
982 static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
983
984 void
985 te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
986 {
987 if (free_timers) {
988 mainloop_timer_del(stonith_history_sync_timer_short);
989 stonith_history_sync_timer_short = NULL;
990 mainloop_timer_del(stonith_history_sync_timer_long);
991 stonith_history_sync_timer_long = NULL;
992 } else {
993 mainloop_timer_stop(stonith_history_sync_timer_short);
994 mainloop_timer_stop(stonith_history_sync_timer_long);
995 }
996
997 if (st) {
998 st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
999 }
1000 }
1001
1002 static void
1003 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
1004 {
1005 te_cleanup_stonith_history_sync(st, FALSE);
1006 crm_debug("Fence-history synced - cancel all timers");
1007 }
1008
1009 static gboolean
1010 stonith_history_sync_set_trigger(gpointer user_data)
1011 {
1012 mainloop_set_trigger(stonith_history_sync_trigger);
1013 return FALSE;
1014 }
1015
1016 void
1017 te_trigger_stonith_history_sync(bool long_timeout)
1018 {
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036 if (stonith_history_sync_trigger == NULL) {
1037 stonith_history_sync_trigger =
1038 mainloop_add_trigger(G_PRIORITY_LOW,
1039 do_stonith_history_sync, NULL);
1040 }
1041
1042 if (long_timeout) {
1043 if(stonith_history_sync_timer_long == NULL) {
1044 stonith_history_sync_timer_long =
1045 mainloop_timer_add("history_sync_long", 30000,
1046 FALSE, stonith_history_sync_set_trigger,
1047 NULL);
1048 }
1049 crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
1050 mainloop_timer_start(stonith_history_sync_timer_long);
1051 } else {
1052 if(stonith_history_sync_timer_short == NULL) {
1053 stonith_history_sync_timer_short =
1054 mainloop_timer_add("history_sync_short", 5000,
1055 FALSE, stonith_history_sync_set_trigger,
1056 NULL);
1057 }
1058 crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
1059 mainloop_timer_start(stonith_history_sync_timer_short);
1060 }
1061
1062 }
1063
1064