This source file includes following definitions.
- update_stonith_max_attempts
- set_fence_reaction
- controld_configure_fencing
- too_many_st_failures
- st_fail_count_reset
- st_fail_count_increment
- cib_fencing_updated
- send_stonith_update
- abort_for_stonith_failure
- add_stonith_cleanup
- remove_stonith_cleanup
- purge_stonith_cleanup
- execute_stonith_cleanup
- fail_incompletable_stonith
- tengine_stonith_connection_destroy
- handle_fence_notification
- te_connect_stonith
- controld_trigger_fencer_connect
- controld_disconnect_fencer
- do_stonith_history_sync
- tengine_stonith_callback
- fence_with_delay
- controld_execute_fence_action
- controld_verify_stonith_watchdog_timeout
- te_cleanup_stonith_history_sync
- tengine_stonith_history_synced
- stonith_history_sync_set_trigger
- te_trigger_stonith_history_sync
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include <crm/crm.h>
12 #include <crm/msg_xml.h>
13 #include <crm/common/xml.h>
14 #include <crm/stonith-ng.h>
15 #include <crm/fencing/internal.h>
16
17 #include <pacemaker-controld.h>
18
19 static void
20 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
21
22
23
24
25
26
27
28
29
30 struct st_fail_rec {
31 int count;
32 };
33
34 static bool fence_reaction_panic = false;
35 static unsigned long int stonith_max_attempts = 10;
36 static GHashTable *stonith_failures = NULL;
37
38
39
40
41
42
43
44 static void
45 update_stonith_max_attempts(const char *value)
46 {
47 stonith_max_attempts = char2score(value);
48 if (stonith_max_attempts < 1UL) {
49 stonith_max_attempts = 10UL;
50 }
51 }
52
53
54
55
56
57
58
59 static void
60 set_fence_reaction(const char *reaction_s)
61 {
62 if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
63 fence_reaction_panic = true;
64
65 } else {
66 if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) {
67 crm_warn("Invalid value '%s' for %s, using 'stop'",
68 reaction_s, XML_CONFIG_ATTR_FENCE_REACTION);
69 }
70 fence_reaction_panic = false;
71 }
72 }
73
74
75
76
77
78
79
80 void
81 controld_configure_fencing(GHashTable *options)
82 {
83 const char *value = NULL;
84
85 value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FENCE_REACTION);
86 set_fence_reaction(value);
87
88 value = g_hash_table_lookup(options, "stonith-max-attempts");
89 update_stonith_max_attempts(value);
90 }
91
92 static gboolean
93 too_many_st_failures(const char *target)
94 {
95 GHashTableIter iter;
96 const char *key = NULL;
97 struct st_fail_rec *value = NULL;
98
99 if (stonith_failures == NULL) {
100 return FALSE;
101 }
102
103 if (target == NULL) {
104 g_hash_table_iter_init(&iter, stonith_failures);
105 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
106 (gpointer *) &value)) {
107
108 if (value->count >= stonith_max_attempts) {
109 target = (const char*)key;
110 goto too_many;
111 }
112 }
113 } else {
114 value = g_hash_table_lookup(stonith_failures, target);
115 if ((value != NULL) && (value->count >= stonith_max_attempts)) {
116 goto too_many;
117 }
118 }
119 return FALSE;
120
121 too_many:
122 crm_warn("Too many failures (%d) to fence %s, giving up",
123 value->count, target);
124 return TRUE;
125 }
126
127
128
129
130
131
132
133 void
134 st_fail_count_reset(const char *target)
135 {
136 if (stonith_failures == NULL) {
137 return;
138 }
139
140 if (target) {
141 struct st_fail_rec *rec = NULL;
142
143 rec = g_hash_table_lookup(stonith_failures, target);
144 if (rec) {
145 rec->count = 0;
146 }
147 } else {
148 GHashTableIter iter;
149 const char *key = NULL;
150 struct st_fail_rec *rec = NULL;
151
152 g_hash_table_iter_init(&iter, stonith_failures);
153 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
154 (gpointer *) &rec)) {
155 rec->count = 0;
156 }
157 }
158 }
159
160 static void
161 st_fail_count_increment(const char *target)
162 {
163 struct st_fail_rec *rec = NULL;
164
165 if (stonith_failures == NULL) {
166 stonith_failures = pcmk__strkey_table(free, free);
167 }
168
169 rec = g_hash_table_lookup(stonith_failures, target);
170 if (rec) {
171 rec->count++;
172 } else {
173 rec = malloc(sizeof(struct st_fail_rec));
174 if(rec == NULL) {
175 return;
176 }
177
178 rec->count = 1;
179 g_hash_table_insert(stonith_failures, strdup(target), rec);
180 }
181 }
182
183
184
185
186 static void
187 cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
188 void *user_data)
189 {
190 if (rc < pcmk_ok) {
191 crm_err("Fencing update %d for %s: failed - %s (%d)",
192 call_id, (char *)user_data, pcmk_strerror(rc), rc);
193 crm_log_xml_warn(msg, "Failed update");
194 abort_transition(INFINITY, pcmk__graph_shutdown, "CIB update failed",
195 NULL);
196
197 } else {
198 crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
199 }
200 }
201
202 static void
203 send_stonith_update(pcmk__graph_action_t *action, const char *target,
204 const char *uuid)
205 {
206 int rc = pcmk_ok;
207 crm_node_t *peer = NULL;
208
209
210
211
212
213 int flags = node_update_join | node_update_expected;
214
215
216 xmlNode *node_state = NULL;
217
218 CRM_CHECK(target != NULL, return);
219 CRM_CHECK(uuid != NULL, return);
220
221
222 peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);
223
224 CRM_CHECK(peer != NULL, return);
225
226 if (peer->state == NULL) {
227
228
229
230
231 flags |= node_update_cluster;
232 }
233
234 if (peer->uuid == NULL) {
235 crm_info("Recording uuid '%s' for node '%s'", uuid, target);
236 peer->uuid = strdup(uuid);
237 }
238
239 crmd_peer_down(peer, TRUE);
240
241
242 node_state = create_node_state_update(peer, flags, NULL, __func__);
243
244
245 if (peer->flags & crm_remote_node) {
246 char *now_s = pcmk__ttoa(time(NULL));
247
248 crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
249 free(now_s);
250 }
251
252
253 crm_xml_add(node_state, XML_ATTR_ID, uuid);
254
255 rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
256 XML_CIB_TAG_STATUS, node_state,
257 cib_scope_local
258 |cib_can_create);
259
260
261 crm_debug("Sending fencing update %d for %s", rc, target);
262 fsa_register_cib_callback(rc, strdup(target), cib_fencing_updated);
263
264
265
266
267
268
269 controld_delete_node_state(peer->uname, controld_section_all,
270 cib_scope_local);
271 free_xml(node_state);
272 return;
273 }
274
275
276
277
278
279
280
281
282
283 static void
284 abort_for_stonith_failure(enum pcmk__graph_next abort_action,
285 const char *target, const xmlNode *reason)
286 {
287
288
289
290 if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
291 abort_action = pcmk__graph_wait;
292 }
293 abort_transition(INFINITY, abort_action, "Stonith failed", reason);
294 }
295
296
297
298
299
300
301
302
303
304
305 static GList *stonith_cleanup_list = NULL;
306
307
308
309
310
311
312
313 void
314 add_stonith_cleanup(const char *target) {
315 stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
316 }
317
318
319
320
321
322
323
324 void
325 remove_stonith_cleanup(const char *target)
326 {
327 GList *iter = stonith_cleanup_list;
328
329 while (iter != NULL) {
330 GList *tmp = iter;
331 char *iter_name = tmp->data;
332
333 iter = iter->next;
334 if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
335 crm_trace("Removing %s from the cleanup list", iter_name);
336 stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
337 free(iter_name);
338 }
339 }
340 }
341
342
343
344
345
346 void
347 purge_stonith_cleanup(void)
348 {
349 if (stonith_cleanup_list) {
350 GList *iter = NULL;
351
352 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
353 char *target = iter->data;
354
355 crm_info("Purging %s from stonith cleanup list", target);
356 free(target);
357 }
358 g_list_free(stonith_cleanup_list);
359 stonith_cleanup_list = NULL;
360 }
361 }
362
363
364
365
366
367 void
368 execute_stonith_cleanup(void)
369 {
370 GList *iter;
371
372 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
373 char *target = iter->data;
374 crm_node_t *target_node = crm_get_peer(0, target);
375 const char *uuid = crm_peer_uuid(target_node);
376
377 crm_notice("Marking %s, target of a previous stonith action, as clean", target);
378 send_stonith_update(NULL, target, uuid);
379 free(target);
380 }
381 g_list_free(stonith_cleanup_list);
382 stonith_cleanup_list = NULL;
383 }
384
385
386
387
388
389
390
391
392
393 static stonith_t *stonith_api = NULL;
394 static crm_trigger_t *stonith_reconnect = NULL;
395 static char *te_client_id = NULL;
396
397 static gboolean
398 fail_incompletable_stonith(pcmk__graph_t *graph)
399 {
400 GList *lpc = NULL;
401 const char *task = NULL;
402 xmlNode *last_action = NULL;
403
404 if (graph == NULL) {
405 return FALSE;
406 }
407
408 for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
409 GList *lpc2 = NULL;
410 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
411
412 if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
413 continue;
414 }
415
416 for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
417 pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
418
419 if ((action->type != pcmk__cluster_graph_action)
420 || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
421 continue;
422 }
423
424 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
425 if (task && pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
426 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
427 last_action = action->xml;
428 pcmk__update_graph(graph, action);
429 crm_notice("Failing action %d (%s): fencer terminated",
430 action->id, ID(action->xml));
431 }
432 }
433 }
434
435 if (last_action != NULL) {
436 crm_warn("Fencer failure resulted in unrunnable actions");
437 abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
438 return TRUE;
439 }
440
441 return FALSE;
442 }
443
444 static void
445 tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
446 {
447 te_cleanup_stonith_history_sync(st, FALSE);
448
449 if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
450 crm_crit("Fencing daemon connection failed");
451 mainloop_set_trigger(stonith_reconnect);
452
453 } else {
454 crm_info("Fencing daemon disconnected");
455 }
456
457 if (stonith_api) {
458
459
460
461 if (stonith_api->state != stonith_disconnected) {
462 stonith_api->cmds->disconnect(st);
463 }
464 stonith_api->cmds->remove_notification(stonith_api, NULL);
465 }
466
467 if (AM_I_DC) {
468 fail_incompletable_stonith(controld_globals.transition_graph);
469 trigger_graph();
470 }
471 }
472
473
474
475
476
477
478
479
480 static void
481 handle_fence_notification(stonith_t *st, stonith_event_t *event)
482 {
483 bool succeeded = true;
484 const char *executioner = "the cluster";
485 const char *client = "a client";
486 const char *reason = NULL;
487 int exec_status;
488
489 if (te_client_id == NULL) {
490 te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
491 (unsigned long) getpid());
492 }
493
494 if (event == NULL) {
495 crm_err("Notify data not found");
496 return;
497 }
498
499 if (event->executioner != NULL) {
500 executioner = event->executioner;
501 }
502 if (event->client_origin != NULL) {
503 client = event->client_origin;
504 }
505
506 exec_status = stonith__event_execution_status(event);
507 if ((stonith__event_exit_status(event) != CRM_EX_OK)
508 || (exec_status != PCMK_EXEC_DONE)) {
509 succeeded = false;
510 if (exec_status == PCMK_EXEC_DONE) {
511 exec_status = PCMK_EXEC_ERROR;
512 }
513 }
514 reason = stonith__event_exit_reason(event);
515
516 crmd_alert_fencing_op(event);
517
518 if (pcmk__str_eq("on", event->action, pcmk__str_none)) {
519
520 if (succeeded) {
521 crm_notice("%s was unfenced by %s at the request of %s@%s",
522 event->target, executioner, client, event->origin);
523 } else {
524 crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
525 event->target, executioner,
526 pcmk_exec_status_str(exec_status),
527 ((reason == NULL)? "" : ": "),
528 ((reason == NULL)? "" : reason),
529 stonith__event_exit_status(event));
530 }
531 return;
532 }
533
534 if (succeeded
535 && pcmk__str_eq(event->target, controld_globals.our_nodename,
536 pcmk__str_casei)) {
537
538
539
540
541
542
543
544
545
546 crm_crit("We were allegedly just fenced by %s for %s!",
547 executioner, event->origin);
548 if (fence_reaction_panic) {
549 pcmk__panic(__func__);
550 } else {
551 crm_exit(CRM_EX_FATAL);
552 }
553 return;
554 }
555
556
557
558
559
560 if (!AM_I_DC) {
561 if (succeeded) {
562 st_fail_count_reset(event->target);
563 } else {
564 st_fail_count_increment(event->target);
565 }
566 }
567
568 crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
569 "%s%s%s%s " CRM_XS " event=%s",
570 event->target, (succeeded? "" : " not"),
571 event->action, executioner, client, event->origin,
572 (succeeded? "OK" : pcmk_exec_status_str(exec_status)),
573 ((reason == NULL)? "" : " ("),
574 ((reason == NULL)? "" : reason),
575 ((reason == NULL)? "" : ")"),
576 event->id);
577
578 if (succeeded) {
579 crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
580 CRM_GET_PEER_ANY);
581 const char *uuid = NULL;
582
583 if (peer == NULL) {
584 return;
585 }
586
587 uuid = crm_peer_uuid(peer);
588
589 if (AM_I_DC) {
590
591 send_stonith_update(NULL, event->target, uuid);
592
593
594
595
596
597
598
599
600
601 if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
602
603
604
605 crm_info("External fencing operation from %s fenced %s",
606 client, event->target);
607 abort_transition(INFINITY, pcmk__graph_restart,
608 "External Fencing Operation", NULL);
609 }
610
611 } else if (pcmk__str_eq(controld_globals.dc_name, event->target,
612 pcmk__str_null_matches|pcmk__str_casei)
613 && !pcmk_is_set(peer->flags, crm_remote_node)) {
614
615
616 if (controld_globals.dc_name != NULL) {
617 crm_notice("Fencing target %s was our DC", event->target);
618 } else {
619 crm_notice("Fencing target %s may have been our DC",
620 event->target);
621 }
622
623
624
625
626
627 if (pcmk__str_eq(event->executioner, controld_globals.our_nodename,
628 pcmk__str_casei)) {
629 send_stonith_update(NULL, event->target, uuid);
630 }
631 add_stonith_cleanup(event->target);
632 }
633
634
635
636
637
638
639 if (pcmk_is_set(peer->flags, crm_remote_node)) {
640 remote_ra_fail(event->target);
641 }
642
643 crmd_peer_down(peer, TRUE);
644 }
645 }
646
647
648
649
650
651
652
653
654
655
656 static gboolean
657 te_connect_stonith(gpointer user_data)
658 {
659 int rc = pcmk_ok;
660
661 if (stonith_api == NULL) {
662 stonith_api = stonith_api_new();
663 if (stonith_api == NULL) {
664 crm_err("Could not connect to fencer: API memory allocation failed");
665 return TRUE;
666 }
667 }
668
669 if (stonith_api->state != stonith_disconnected) {
670 crm_trace("Already connected to fencer, no need to retry");
671 return TRUE;
672 }
673
674 if (user_data == NULL) {
675
676 rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
677 if (rc != pcmk_ok) {
678 crm_err("Could not connect to fencer in 30 attempts: %s "
679 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
680 }
681 } else {
682
683 rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
684 if (rc != pcmk_ok) {
685 if (pcmk_is_set(controld_globals.fsa_input_register,
686 R_ST_REQUIRED)) {
687 crm_notice("Fencer connection failed (will retry): %s "
688 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
689 mainloop_set_trigger(stonith_reconnect);
690 } else {
691 crm_info("Fencer connection failed (ignoring because no longer required): %s "
692 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
693 }
694 return TRUE;
695 }
696 }
697
698 if (rc == pcmk_ok) {
699 stonith_api->cmds->register_notification(stonith_api,
700 T_STONITH_NOTIFY_DISCONNECT,
701 tengine_stonith_connection_destroy);
702 stonith_api->cmds->register_notification(stonith_api,
703 T_STONITH_NOTIFY_FENCE,
704 handle_fence_notification);
705 stonith_api->cmds->register_notification(stonith_api,
706 T_STONITH_NOTIFY_HISTORY_SYNCED,
707 tengine_stonith_history_synced);
708 te_trigger_stonith_history_sync(TRUE);
709 crm_notice("Fencer successfully connected");
710 }
711
712 return TRUE;
713 }
714
715
716
717
718
719 void
720 controld_trigger_fencer_connect(void)
721 {
722 if (stonith_reconnect == NULL) {
723 stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
724 te_connect_stonith,
725 GINT_TO_POINTER(TRUE));
726 }
727 controld_set_fsa_input_flags(R_ST_REQUIRED);
728 mainloop_set_trigger(stonith_reconnect);
729 }
730
731 void
732 controld_disconnect_fencer(bool destroy)
733 {
734 if (stonith_api) {
735
736 controld_clear_fsa_input_flags(R_ST_REQUIRED);
737
738 if (stonith_api->state != stonith_disconnected) {
739 stonith_api->cmds->disconnect(stonith_api);
740 }
741 stonith_api->cmds->remove_notification(stonith_api, NULL);
742 }
743 if (destroy) {
744 if (stonith_api) {
745 stonith_api->cmds->free(stonith_api);
746 stonith_api = NULL;
747 }
748 if (stonith_reconnect) {
749 mainloop_destroy_trigger(stonith_reconnect);
750 stonith_reconnect = NULL;
751 }
752 if (te_client_id) {
753 free(te_client_id);
754 te_client_id = NULL;
755 }
756 }
757 }
758
759 static gboolean
760 do_stonith_history_sync(gpointer user_data)
761 {
762 if (stonith_api && (stonith_api->state != stonith_disconnected)) {
763 stonith_history_t *history = NULL;
764
765 te_cleanup_stonith_history_sync(stonith_api, FALSE);
766 stonith_api->cmds->history(stonith_api,
767 st_opt_sync_call | st_opt_broadcast,
768 NULL, &history, 5);
769 stonith_history_free(history);
770 return TRUE;
771 } else {
772 crm_info("Skip triggering stonith history-sync as stonith is disconnected");
773 return FALSE;
774 }
775 }
776
777 static void
778 tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
779 {
780 char *uuid = NULL;
781 int stonith_id = -1;
782 int transition_id = -1;
783 pcmk__graph_action_t *action = NULL;
784 const char *target = NULL;
785
786 if ((data == NULL) || (data->userdata == NULL)) {
787 crm_err("Ignoring fence operation %d result: "
788 "No transition key given (bug?)",
789 ((data == NULL)? -1 : data->call_id));
790 return;
791 }
792
793 if (!AM_I_DC) {
794 const char *reason = stonith__exit_reason(data);
795
796 if (reason == NULL) {
797 reason = pcmk_exec_status_str(stonith__execution_status(data));
798 }
799 crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
800 data->call_id, stonith__exit_status(data), reason,
801 (const char *) data->userdata);
802 return;
803 }
804
805 CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
806 &stonith_id, NULL),
807 goto bail);
808
809 if (controld_globals.transition_graph->complete || (stonith_id < 0)
810 || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
811 || (controld_globals.transition_graph->id != transition_id)) {
812 crm_info("Ignoring fence operation %d result: "
813 "Not from current transition " CRM_XS
814 " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
815 data->call_id,
816 pcmk__btoa(controld_globals.transition_graph->complete),
817 stonith_id, uuid, controld_globals.te_uuid, transition_id,
818 controld_globals.transition_graph->id);
819 goto bail;
820 }
821
822 action = controld_get_action(stonith_id);
823 if (action == NULL) {
824 crm_err("Ignoring fence operation %d result: "
825 "Action %d not found in transition graph (bug?) "
826 CRM_XS " uuid=%s transition=%d",
827 data->call_id, stonith_id, uuid, transition_id);
828 goto bail;
829 }
830
831 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
832 if (target == NULL) {
833 crm_err("Ignoring fence operation %d result: No target given (bug?)",
834 data->call_id);
835 goto bail;
836 }
837
838 stop_te_timer(action);
839 if (stonith__exit_status(data) == CRM_EX_OK) {
840 const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
841 const char *op = crm_meta_value(action->params, "stonith_action");
842
843 crm_info("Fence operation %d for %s succeeded", data->call_id, target);
844 if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
845 te_action_confirmed(action, NULL);
846 if (pcmk__str_eq("on", op, pcmk__str_casei)) {
847 const char *value = NULL;
848 char *now = pcmk__ttoa(time(NULL));
849 gboolean is_remote_node = FALSE;
850
851
852
853
854
855
856
857
858 if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
859 is_remote_node = TRUE;
860 }
861
862 update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
863 is_remote_node);
864 free(now);
865
866 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
867 update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
868 is_remote_node);
869
870 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
871 update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
872 is_remote_node);
873
874 } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
875 send_stonith_update(action, target, uuid);
876 pcmk__set_graph_action_flags(action,
877 pcmk__graph_action_sent_update);
878 }
879 }
880 st_fail_count_reset(target);
881
882 } else {
883 enum pcmk__graph_next abort_action = pcmk__graph_restart;
884 int status = stonith__execution_status(data);
885 const char *reason = stonith__exit_reason(data);
886
887 if (reason == NULL) {
888 if (status == PCMK_EXEC_DONE) {
889 reason = "Agent returned error";
890 } else {
891 reason = pcmk_exec_status_str(status);
892 }
893 }
894 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
895
896
897
898
899 if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
900 crm_warn("Fence operation %d for %s failed: %s "
901 "(aborting transition and giving up for now)",
902 data->call_id, target, reason);
903 abort_action = pcmk__graph_wait;
904 } else {
905 crm_notice("Fence operation %d for %s failed: %s "
906 "(aborting transition)", data->call_id, target, reason);
907 }
908
909
910
911
912
913 st_fail_count_increment(target);
914 abort_for_stonith_failure(abort_action, target, NULL);
915 }
916
917 pcmk__update_graph(controld_globals.transition_graph, action);
918 trigger_graph();
919
920 bail:
921 free(data->userdata);
922 free(uuid);
923 return;
924 }
925
926 static int
927 fence_with_delay(const char *target, const char *type, int delay)
928 {
929 uint32_t options = st_opt_none;
930 int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout
931 / 1000);
932
933 if (crmd_join_phase_count(crm_join_confirmed) == 1) {
934 stonith__set_call_options(options, target, st_opt_allow_suicide);
935 }
936 return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
937 type, timeout_sec, 0, delay);
938 }
939
940
941
942
943
944
945
946
947
948
949 int
950 controld_execute_fence_action(pcmk__graph_t *graph,
951 pcmk__graph_action_t *action)
952 {
953 int rc = 0;
954 const char *id = ID(action->xml);
955 const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
956 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
957 const char *type = crm_meta_value(action->params, "stonith_action");
958 char *transition_key = NULL;
959 const char *priority_delay = NULL;
960 int delay_i = 0;
961 gboolean invalid_action = FALSE;
962 int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout
963 / 1000);
964
965 CRM_CHECK(id != NULL, invalid_action = TRUE);
966 CRM_CHECK(uuid != NULL, invalid_action = TRUE);
967 CRM_CHECK(type != NULL, invalid_action = TRUE);
968 CRM_CHECK(target != NULL, invalid_action = TRUE);
969
970 if (invalid_action) {
971 crm_log_xml_warn(action->xml, "BadAction");
972 return EPROTO;
973 }
974
975 priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
976
977 crm_notice("Requesting fencing (%s) targeting node %s "
978 CRM_XS " action=%s timeout=%i%s%s",
979 type, target, id, stonith_timeout,
980 priority_delay ? " priority_delay=" : "",
981 priority_delay ? priority_delay : "");
982
983
984 te_connect_stonith(NULL);
985
986 pcmk__scan_min_int(priority_delay, &delay_i, 0);
987 rc = fence_with_delay(target, type, delay_i);
988 transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
989 action->id, 0,
990 controld_globals.te_uuid),
991 stonith_api->cmds->register_callback(stonith_api, rc,
992 (stonith_timeout
993 + (delay_i > 0 ? delay_i : 0)),
994 st_opt_timeout_updates, transition_key,
995 "tengine_stonith_callback",
996 tengine_stonith_callback);
997 return pcmk_rc_ok;
998 }
999
1000 bool
1001 controld_verify_stonith_watchdog_timeout(const char *value)
1002 {
1003 const char *our_nodename = controld_globals.our_nodename;
1004 gboolean rv = TRUE;
1005
1006 if (stonith_api && (stonith_api->state != stonith_disconnected) &&
1007 stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
1008 our_nodename)) {
1009 rv = pcmk__valid_sbd_timeout(value);
1010 }
1011 return rv;
1012 }
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024 static crm_trigger_t *stonith_history_sync_trigger = NULL;
1025 static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
1026 static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
1027
1028 void
1029 te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
1030 {
1031 if (free_timers) {
1032 mainloop_timer_del(stonith_history_sync_timer_short);
1033 stonith_history_sync_timer_short = NULL;
1034 mainloop_timer_del(stonith_history_sync_timer_long);
1035 stonith_history_sync_timer_long = NULL;
1036 } else {
1037 mainloop_timer_stop(stonith_history_sync_timer_short);
1038 mainloop_timer_stop(stonith_history_sync_timer_long);
1039 }
1040
1041 if (st) {
1042 st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
1043 }
1044 }
1045
1046 static void
1047 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
1048 {
1049 te_cleanup_stonith_history_sync(st, FALSE);
1050 crm_debug("Fence-history synced - cancel all timers");
1051 }
1052
1053 static gboolean
1054 stonith_history_sync_set_trigger(gpointer user_data)
1055 {
1056 mainloop_set_trigger(stonith_history_sync_trigger);
1057 return FALSE;
1058 }
1059
1060 void
1061 te_trigger_stonith_history_sync(bool long_timeout)
1062 {
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080 if (stonith_history_sync_trigger == NULL) {
1081 stonith_history_sync_trigger =
1082 mainloop_add_trigger(G_PRIORITY_LOW,
1083 do_stonith_history_sync, NULL);
1084 }
1085
1086 if (long_timeout) {
1087 if(stonith_history_sync_timer_long == NULL) {
1088 stonith_history_sync_timer_long =
1089 mainloop_timer_add("history_sync_long", 30000,
1090 FALSE, stonith_history_sync_set_trigger,
1091 NULL);
1092 }
1093 crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
1094 mainloop_timer_start(stonith_history_sync_timer_long);
1095 } else {
1096 if(stonith_history_sync_timer_short == NULL) {
1097 stonith_history_sync_timer_short =
1098 mainloop_timer_add("history_sync_short", 5000,
1099 FALSE, stonith_history_sync_set_trigger,
1100 NULL);
1101 }
1102 crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
1103 mainloop_timer_start(stonith_history_sync_timer_short);
1104 }
1105
1106 }
1107
1108