This source file includes following definitions.
- update_stonith_max_attempts
- set_fence_reaction
- controld_configure_fencing
- too_many_st_failures
- st_fail_count_reset
- st_fail_count_increment
- cib_fencing_updated
- send_stonith_update
- abort_for_stonith_failure
- add_stonith_cleanup
- remove_stonith_cleanup
- purge_stonith_cleanup
- execute_stonith_cleanup
- fail_incompletable_stonith
- tengine_stonith_connection_destroy
- handle_fence_notification
- controld_timer_fencer_connect
- controld_disconnect_fencer
- do_stonith_history_sync
- tengine_stonith_callback
- fence_with_delay
- controld_execute_fence_action
- controld_verify_stonith_watchdog_timeout
- te_cleanup_stonith_history_sync
- tengine_stonith_history_synced
- stonith_history_sync_set_trigger
- te_trigger_stonith_history_sync
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include <crm/crm.h>
12 #include <crm/msg_xml.h>
13 #include <crm/common/xml.h>
14 #include <crm/stonith-ng.h>
15 #include <crm/fencing/internal.h>
16
17 #include <pacemaker-controld.h>
18
19 static void
20 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
21
22
23
24
25
26
27
28
29
30 struct st_fail_rec {
31 int count;
32 };
33
34 static bool fence_reaction_panic = false;
35 static unsigned long int stonith_max_attempts = 10;
36 static GHashTable *stonith_failures = NULL;
37
38
39
40
41
42
43
44 static void
45 update_stonith_max_attempts(const char *value)
46 {
47 stonith_max_attempts = char2score(value);
48 if (stonith_max_attempts < 1UL) {
49 stonith_max_attempts = 10UL;
50 }
51 }
52
53
54
55
56
57
58
59 static void
60 set_fence_reaction(const char *reaction_s)
61 {
62 if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
63 fence_reaction_panic = true;
64
65 } else {
66 if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) {
67 crm_warn("Invalid value '%s' for %s, using 'stop'",
68 reaction_s, XML_CONFIG_ATTR_FENCE_REACTION);
69 }
70 fence_reaction_panic = false;
71 }
72 }
73
74
75
76
77
78
79
80 void
81 controld_configure_fencing(GHashTable *options)
82 {
83 const char *value = NULL;
84
85 value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FENCE_REACTION);
86 set_fence_reaction(value);
87
88 value = g_hash_table_lookup(options, "stonith-max-attempts");
89 update_stonith_max_attempts(value);
90 }
91
92 static gboolean
93 too_many_st_failures(const char *target)
94 {
95 GHashTableIter iter;
96 const char *key = NULL;
97 struct st_fail_rec *value = NULL;
98
99 if (stonith_failures == NULL) {
100 return FALSE;
101 }
102
103 if (target == NULL) {
104 g_hash_table_iter_init(&iter, stonith_failures);
105 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
106 (gpointer *) &value)) {
107
108 if (value->count >= stonith_max_attempts) {
109 target = (const char*)key;
110 goto too_many;
111 }
112 }
113 } else {
114 value = g_hash_table_lookup(stonith_failures, target);
115 if ((value != NULL) && (value->count >= stonith_max_attempts)) {
116 goto too_many;
117 }
118 }
119 return FALSE;
120
121 too_many:
122 crm_warn("Too many failures (%d) to fence %s, giving up",
123 value->count, target);
124 return TRUE;
125 }
126
127
128
129
130
131
132
133 void
134 st_fail_count_reset(const char *target)
135 {
136 if (stonith_failures == NULL) {
137 return;
138 }
139
140 if (target) {
141 struct st_fail_rec *rec = NULL;
142
143 rec = g_hash_table_lookup(stonith_failures, target);
144 if (rec) {
145 rec->count = 0;
146 }
147 } else {
148 GHashTableIter iter;
149 const char *key = NULL;
150 struct st_fail_rec *rec = NULL;
151
152 g_hash_table_iter_init(&iter, stonith_failures);
153 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
154 (gpointer *) &rec)) {
155 rec->count = 0;
156 }
157 }
158 }
159
160 static void
161 st_fail_count_increment(const char *target)
162 {
163 struct st_fail_rec *rec = NULL;
164
165 if (stonith_failures == NULL) {
166 stonith_failures = pcmk__strkey_table(free, free);
167 }
168
169 rec = g_hash_table_lookup(stonith_failures, target);
170 if (rec) {
171 rec->count++;
172 } else {
173 rec = malloc(sizeof(struct st_fail_rec));
174 if(rec == NULL) {
175 return;
176 }
177
178 rec->count = 1;
179 g_hash_table_insert(stonith_failures, strdup(target), rec);
180 }
181 }
182
183
184
185
186 static void
187 cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
188 void *user_data)
189 {
190 if (rc < pcmk_ok) {
191 crm_err("Fencing update %d for %s: failed - %s (%d)",
192 call_id, (char *)user_data, pcmk_strerror(rc), rc);
193 crm_log_xml_warn(msg, "Failed update");
194 abort_transition(INFINITY, pcmk__graph_shutdown, "CIB update failed",
195 NULL);
196
197 } else {
198 crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
199 }
200 }
201
202 static void
203 send_stonith_update(pcmk__graph_action_t *action, const char *target,
204 const char *uuid)
205 {
206 int rc = pcmk_ok;
207 crm_node_t *peer = NULL;
208
209
210
211
212
213 int flags = node_update_join | node_update_expected;
214
215
216 xmlNode *node_state = NULL;
217
218 CRM_CHECK(target != NULL, return);
219 CRM_CHECK(uuid != NULL, return);
220
221
222
223
224
225 peer = pcmk__get_peer_full(0, target, uuid, CRM_GET_PEER_ANY);
226
227 CRM_CHECK(peer != NULL, return);
228
229 if (peer->state == NULL) {
230
231
232
233
234 flags |= node_update_cluster;
235 }
236
237 if (peer->uuid == NULL) {
238 crm_info("Recording uuid '%s' for node '%s'", uuid, target);
239 peer->uuid = strdup(uuid);
240 }
241
242 crmd_peer_down(peer, TRUE);
243
244
245 node_state = create_node_state_update(peer, flags, NULL, __func__);
246
247
248 if (peer->flags & crm_remote_node) {
249 char *now_s = pcmk__ttoa(time(NULL));
250
251 crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
252 free(now_s);
253 }
254
255
256 crm_xml_add(node_state, XML_ATTR_ID, uuid);
257
258 rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
259 XML_CIB_TAG_STATUS, node_state,
260 cib_scope_local
261 |cib_can_create);
262
263
264 crm_debug("Sending fencing update %d for %s", rc, target);
265 fsa_register_cib_callback(rc, strdup(target), cib_fencing_updated);
266
267
268
269
270
271
272 controld_delete_node_state(peer->uname, controld_section_all,
273 cib_scope_local);
274 free_xml(node_state);
275 return;
276 }
277
278
279
280
281
282
283
284
285
286 static void
287 abort_for_stonith_failure(enum pcmk__graph_next abort_action,
288 const char *target, const xmlNode *reason)
289 {
290
291
292
293 if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
294 abort_action = pcmk__graph_wait;
295 }
296 abort_transition(INFINITY, abort_action, "Stonith failed", reason);
297 }
298
299
300
301
302
303
304
305
306
307
308 static GList *stonith_cleanup_list = NULL;
309
310
311
312
313
314
315
316 void
317 add_stonith_cleanup(const char *target) {
318 stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
319 }
320
321
322
323
324
325
326
327 void
328 remove_stonith_cleanup(const char *target)
329 {
330 GList *iter = stonith_cleanup_list;
331
332 while (iter != NULL) {
333 GList *tmp = iter;
334 char *iter_name = tmp->data;
335
336 iter = iter->next;
337 if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
338 crm_trace("Removing %s from the cleanup list", iter_name);
339 stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
340 free(iter_name);
341 }
342 }
343 }
344
345
346
347
348
349 void
350 purge_stonith_cleanup(void)
351 {
352 if (stonith_cleanup_list) {
353 GList *iter = NULL;
354
355 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
356 char *target = iter->data;
357
358 crm_info("Purging %s from stonith cleanup list", target);
359 free(target);
360 }
361 g_list_free(stonith_cleanup_list);
362 stonith_cleanup_list = NULL;
363 }
364 }
365
366
367
368
369
370 void
371 execute_stonith_cleanup(void)
372 {
373 GList *iter;
374
375 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
376 char *target = iter->data;
377 crm_node_t *target_node = crm_get_peer(0, target);
378 const char *uuid = crm_peer_uuid(target_node);
379
380 crm_notice("Marking %s, target of a previous stonith action, as clean", target);
381 send_stonith_update(NULL, target, uuid);
382 free(target);
383 }
384 g_list_free(stonith_cleanup_list);
385 stonith_cleanup_list = NULL;
386 }
387
388
389
390
391
392
393
394
395
396 static stonith_t *stonith_api = NULL;
397 static mainloop_timer_t *controld_fencer_connect_timer = NULL;
398 static char *te_client_id = NULL;
399
400 static gboolean
401 fail_incompletable_stonith(pcmk__graph_t *graph)
402 {
403 GList *lpc = NULL;
404 const char *task = NULL;
405 xmlNode *last_action = NULL;
406
407 if (graph == NULL) {
408 return FALSE;
409 }
410
411 for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
412 GList *lpc2 = NULL;
413 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
414
415 if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
416 continue;
417 }
418
419 for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
420 pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
421
422 if ((action->type != pcmk__cluster_graph_action)
423 || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
424 continue;
425 }
426
427 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
428 if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
429 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
430 last_action = action->xml;
431 pcmk__update_graph(graph, action);
432 crm_notice("Failing action %d (%s): fencer terminated",
433 action->id, ID(action->xml));
434 }
435 }
436 }
437
438 if (last_action != NULL) {
439 crm_warn("Fencer failure resulted in unrunnable actions");
440 abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
441 return TRUE;
442 }
443
444 return FALSE;
445 }
446
447 static void
448 tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
449 {
450 te_cleanup_stonith_history_sync(st, FALSE);
451
452 if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
453 crm_err("Lost fencer connection (will attempt to reconnect)");
454 if (!mainloop_timer_running(controld_fencer_connect_timer)) {
455 mainloop_timer_start(controld_fencer_connect_timer);
456 }
457 } else {
458 crm_info("Disconnected from fencer");
459 }
460
461 if (stonith_api) {
462
463
464
465 if (stonith_api->state != stonith_disconnected) {
466 stonith_api->cmds->disconnect(st);
467 }
468 stonith_api->cmds->remove_notification(stonith_api, NULL);
469 }
470
471 if (AM_I_DC) {
472 fail_incompletable_stonith(controld_globals.transition_graph);
473 trigger_graph();
474 }
475 }
476
477
478
479
480
481
482
483
484 static void
485 handle_fence_notification(stonith_t *st, stonith_event_t *event)
486 {
487 bool succeeded = true;
488 const char *executioner = "the cluster";
489 const char *client = "a client";
490 const char *reason = NULL;
491 int exec_status;
492
493 if (te_client_id == NULL) {
494 te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
495 (unsigned long) getpid());
496 }
497
498 if (event == NULL) {
499 crm_err("Notify data not found");
500 return;
501 }
502
503 if (event->executioner != NULL) {
504 executioner = event->executioner;
505 }
506 if (event->client_origin != NULL) {
507 client = event->client_origin;
508 }
509
510 exec_status = stonith__event_execution_status(event);
511 if ((stonith__event_exit_status(event) != CRM_EX_OK)
512 || (exec_status != PCMK_EXEC_DONE)) {
513 succeeded = false;
514 if (exec_status == PCMK_EXEC_DONE) {
515 exec_status = PCMK_EXEC_ERROR;
516 }
517 }
518 reason = stonith__event_exit_reason(event);
519
520 crmd_alert_fencing_op(event);
521
522 if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) {
523
524 if (succeeded) {
525 crm_notice("%s was unfenced by %s at the request of %s@%s",
526 event->target, executioner, client, event->origin);
527 } else {
528 crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
529 event->target, executioner,
530 pcmk_exec_status_str(exec_status),
531 ((reason == NULL)? "" : ": "),
532 ((reason == NULL)? "" : reason),
533 stonith__event_exit_status(event));
534 }
535 return;
536 }
537
538 if (succeeded
539 && pcmk__str_eq(event->target, controld_globals.our_nodename,
540 pcmk__str_casei)) {
541
542
543
544
545
546
547
548
549
550 crm_crit("We were allegedly just fenced by %s for %s!",
551 executioner, event->origin);
552 if (fence_reaction_panic) {
553 pcmk__panic(__func__);
554 } else {
555 crm_exit(CRM_EX_FATAL);
556 }
557 return;
558 }
559
560
561
562
563
564 if (!AM_I_DC) {
565 if (succeeded) {
566 st_fail_count_reset(event->target);
567 } else {
568 st_fail_count_increment(event->target);
569 }
570 }
571
572 crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
573 "%s%s%s%s " CRM_XS " event=%s",
574 event->target, (succeeded? "" : " not"),
575 event->action, executioner, client, event->origin,
576 (succeeded? "OK" : pcmk_exec_status_str(exec_status)),
577 ((reason == NULL)? "" : " ("),
578 ((reason == NULL)? "" : reason),
579 ((reason == NULL)? "" : ")"),
580 event->id);
581
582 if (succeeded) {
583 crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
584 CRM_GET_PEER_ANY);
585 const char *uuid = NULL;
586
587 if (peer == NULL) {
588 return;
589 }
590
591 uuid = crm_peer_uuid(peer);
592
593 if (AM_I_DC) {
594
595 send_stonith_update(NULL, event->target, uuid);
596
597
598
599
600
601
602
603
604
605 if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
606
607
608
609 crm_info("External fencing operation from %s fenced %s",
610 client, event->target);
611 abort_transition(INFINITY, pcmk__graph_restart,
612 "External Fencing Operation", NULL);
613 }
614
615 } else if (pcmk__str_eq(controld_globals.dc_name, event->target,
616 pcmk__str_null_matches|pcmk__str_casei)
617 && !pcmk_is_set(peer->flags, crm_remote_node)) {
618
619
620 if (controld_globals.dc_name != NULL) {
621 crm_notice("Fencing target %s was our DC", event->target);
622 } else {
623 crm_notice("Fencing target %s may have been our DC",
624 event->target);
625 }
626
627
628
629
630
631 if (pcmk__str_eq(event->executioner, controld_globals.our_nodename,
632 pcmk__str_casei)) {
633 send_stonith_update(NULL, event->target, uuid);
634 }
635 add_stonith_cleanup(event->target);
636 }
637
638
639
640
641
642
643 if (pcmk_is_set(peer->flags, crm_remote_node)) {
644 remote_ra_fail(event->target);
645 }
646
647 crmd_peer_down(peer, TRUE);
648 }
649 }
650
651
652
653
654
655
656
657
658
659
660 gboolean
661 controld_timer_fencer_connect(gpointer user_data)
662 {
663 int rc = pcmk_ok;
664
665 if (stonith_api == NULL) {
666 stonith_api = stonith_api_new();
667 if (stonith_api == NULL) {
668 crm_err("Could not connect to fencer: API memory allocation failed");
669 return G_SOURCE_REMOVE;
670 }
671 }
672
673 if (stonith_api->state != stonith_disconnected) {
674 crm_trace("Already connected to fencer, no need to retry");
675 return G_SOURCE_REMOVE;
676 }
677
678 if (user_data == NULL) {
679
680 rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
681 if (rc != pcmk_ok) {
682 crm_err("Could not connect to fencer in 30 attempts: %s "
683 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
684 }
685 } else {
686
687 rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
688
689 if (controld_fencer_connect_timer == NULL) {
690 controld_fencer_connect_timer =
691 mainloop_timer_add("controld_fencer_connect", 1000,
692 TRUE, controld_timer_fencer_connect,
693 GINT_TO_POINTER(TRUE));
694 }
695
696 if (rc != pcmk_ok) {
697 if (pcmk_is_set(controld_globals.fsa_input_register,
698 R_ST_REQUIRED)) {
699 crm_notice("Fencer connection failed (will retry): %s "
700 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
701
702 if (!mainloop_timer_running(controld_fencer_connect_timer)) {
703 mainloop_timer_start(controld_fencer_connect_timer);
704 }
705
706 return G_SOURCE_CONTINUE;
707 } else {
708 crm_info("Fencer connection failed (ignoring because no longer required): %s "
709 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
710 }
711 return G_SOURCE_REMOVE;
712 }
713 }
714
715 if (rc == pcmk_ok) {
716 stonith_api->cmds->register_notification(stonith_api,
717 T_STONITH_NOTIFY_DISCONNECT,
718 tengine_stonith_connection_destroy);
719 stonith_api->cmds->register_notification(stonith_api,
720 T_STONITH_NOTIFY_FENCE,
721 handle_fence_notification);
722 stonith_api->cmds->register_notification(stonith_api,
723 T_STONITH_NOTIFY_HISTORY_SYNCED,
724 tengine_stonith_history_synced);
725 te_trigger_stonith_history_sync(TRUE);
726 crm_notice("Fencer successfully connected");
727 }
728
729 return G_SOURCE_REMOVE;
730 }
731
732 void
733 controld_disconnect_fencer(bool destroy)
734 {
735 if (stonith_api) {
736
737 controld_clear_fsa_input_flags(R_ST_REQUIRED);
738
739 if (stonith_api->state != stonith_disconnected) {
740 stonith_api->cmds->disconnect(stonith_api);
741 }
742 stonith_api->cmds->remove_notification(stonith_api, NULL);
743 }
744 if (destroy) {
745 if (stonith_api) {
746 stonith_api->cmds->free(stonith_api);
747 stonith_api = NULL;
748 }
749 if (controld_fencer_connect_timer) {
750 mainloop_timer_del(controld_fencer_connect_timer);
751 controld_fencer_connect_timer = NULL;
752 }
753 if (te_client_id) {
754 free(te_client_id);
755 te_client_id = NULL;
756 }
757 }
758 }
759
760 static gboolean
761 do_stonith_history_sync(gpointer user_data)
762 {
763 if (stonith_api && (stonith_api->state != stonith_disconnected)) {
764 stonith_history_t *history = NULL;
765
766 te_cleanup_stonith_history_sync(stonith_api, FALSE);
767 stonith_api->cmds->history(stonith_api,
768 st_opt_sync_call | st_opt_broadcast,
769 NULL, &history, 5);
770 stonith_history_free(history);
771 return TRUE;
772 } else {
773 crm_info("Skip triggering stonith history-sync as stonith is disconnected");
774 return FALSE;
775 }
776 }
777
778 static void
779 tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
780 {
781 char *uuid = NULL;
782 int stonith_id = -1;
783 int transition_id = -1;
784 pcmk__graph_action_t *action = NULL;
785 const char *target = NULL;
786
787 if ((data == NULL) || (data->userdata == NULL)) {
788 crm_err("Ignoring fence operation %d result: "
789 "No transition key given (bug?)",
790 ((data == NULL)? -1 : data->call_id));
791 return;
792 }
793
794 if (!AM_I_DC) {
795 const char *reason = stonith__exit_reason(data);
796
797 if (reason == NULL) {
798 reason = pcmk_exec_status_str(stonith__execution_status(data));
799 }
800 crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
801 data->call_id, stonith__exit_status(data), reason,
802 (const char *) data->userdata);
803 return;
804 }
805
806 CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
807 &stonith_id, NULL),
808 goto bail);
809
810 if (controld_globals.transition_graph->complete || (stonith_id < 0)
811 || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
812 || (controld_globals.transition_graph->id != transition_id)) {
813 crm_info("Ignoring fence operation %d result: "
814 "Not from current transition " CRM_XS
815 " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
816 data->call_id,
817 pcmk__btoa(controld_globals.transition_graph->complete),
818 stonith_id, uuid, controld_globals.te_uuid, transition_id,
819 controld_globals.transition_graph->id);
820 goto bail;
821 }
822
823 action = controld_get_action(stonith_id);
824 if (action == NULL) {
825 crm_err("Ignoring fence operation %d result: "
826 "Action %d not found in transition graph (bug?) "
827 CRM_XS " uuid=%s transition=%d",
828 data->call_id, stonith_id, uuid, transition_id);
829 goto bail;
830 }
831
832 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
833 if (target == NULL) {
834 crm_err("Ignoring fence operation %d result: No target given (bug?)",
835 data->call_id);
836 goto bail;
837 }
838
839 stop_te_timer(action);
840 if (stonith__exit_status(data) == CRM_EX_OK) {
841 const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
842 const char *op = crm_meta_value(action->params, "stonith_action");
843
844 crm_info("Fence operation %d for %s succeeded", data->call_id, target);
845 if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
846 te_action_confirmed(action, NULL);
847 if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) {
848 const char *value = NULL;
849 char *now = pcmk__ttoa(time(NULL));
850 gboolean is_remote_node = FALSE;
851
852
853
854
855
856
857
858
859 if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
860 is_remote_node = TRUE;
861 }
862
863 update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
864 is_remote_node);
865 free(now);
866
867 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
868 update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
869 is_remote_node);
870
871 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
872 update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
873 is_remote_node);
874
875 } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
876 send_stonith_update(action, target, uuid);
877 pcmk__set_graph_action_flags(action,
878 pcmk__graph_action_sent_update);
879 }
880 }
881 st_fail_count_reset(target);
882
883 } else {
884 enum pcmk__graph_next abort_action = pcmk__graph_restart;
885 int status = stonith__execution_status(data);
886 const char *reason = stonith__exit_reason(data);
887
888 if (reason == NULL) {
889 if (status == PCMK_EXEC_DONE) {
890 reason = "Agent returned error";
891 } else {
892 reason = pcmk_exec_status_str(status);
893 }
894 }
895 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
896
897
898
899
900 if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
901 crm_warn("Fence operation %d for %s failed: %s "
902 "(aborting transition and giving up for now)",
903 data->call_id, target, reason);
904 abort_action = pcmk__graph_wait;
905 } else {
906 crm_notice("Fence operation %d for %s failed: %s "
907 "(aborting transition)", data->call_id, target, reason);
908 }
909
910
911
912
913
914 st_fail_count_increment(target);
915 abort_for_stonith_failure(abort_action, target, NULL);
916 }
917
918 pcmk__update_graph(controld_globals.transition_graph, action);
919 trigger_graph();
920
921 bail:
922 free(data->userdata);
923 free(uuid);
924 return;
925 }
926
927 static int
928 fence_with_delay(const char *target, const char *type, int delay)
929 {
930 uint32_t options = st_opt_none;
931 int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout
932 / 1000);
933
934 if (crmd_join_phase_count(crm_join_confirmed) == 1) {
935 stonith__set_call_options(options, target, st_opt_allow_suicide);
936 }
937 return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
938 type, timeout_sec, 0, delay);
939 }
940
941
942
943
944
945
946
947
948
949
950 int
951 controld_execute_fence_action(pcmk__graph_t *graph,
952 pcmk__graph_action_t *action)
953 {
954 int rc = 0;
955 const char *id = ID(action->xml);
956 const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
957 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
958 const char *type = crm_meta_value(action->params, "stonith_action");
959 char *transition_key = NULL;
960 const char *priority_delay = NULL;
961 int delay_i = 0;
962 gboolean invalid_action = FALSE;
963 int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout
964 / 1000);
965
966 CRM_CHECK(id != NULL, invalid_action = TRUE);
967 CRM_CHECK(uuid != NULL, invalid_action = TRUE);
968 CRM_CHECK(type != NULL, invalid_action = TRUE);
969 CRM_CHECK(target != NULL, invalid_action = TRUE);
970
971 if (invalid_action) {
972 crm_log_xml_warn(action->xml, "BadAction");
973 return EPROTO;
974 }
975
976 priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
977
978 crm_notice("Requesting fencing (%s) targeting node %s "
979 CRM_XS " action=%s timeout=%i%s%s",
980 type, target, id, stonith_timeout,
981 priority_delay ? " priority_delay=" : "",
982 priority_delay ? priority_delay : "");
983
984
985 controld_timer_fencer_connect(NULL);
986
987 pcmk__scan_min_int(priority_delay, &delay_i, 0);
988 rc = fence_with_delay(target, type, delay_i);
989 transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
990 action->id, 0,
991 controld_globals.te_uuid),
992 stonith_api->cmds->register_callback(stonith_api, rc,
993 (stonith_timeout
994 + (delay_i > 0 ? delay_i : 0)),
995 st_opt_timeout_updates, transition_key,
996 "tengine_stonith_callback",
997 tengine_stonith_callback);
998 return pcmk_rc_ok;
999 }
1000
1001 bool
1002 controld_verify_stonith_watchdog_timeout(const char *value)
1003 {
1004 long st_timeout = value? crm_get_msec(value) : 0;
1005 const char *our_nodename = controld_globals.our_nodename;
1006 gboolean rv = TRUE;
1007
1008 if (st_timeout == 0
1009 || (stonith_api && (stonith_api->state != stonith_disconnected) &&
1010 stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
1011 our_nodename))) {
1012 rv = pcmk__valid_sbd_timeout(value);
1013 }
1014 return rv;
1015 }
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027 static crm_trigger_t *stonith_history_sync_trigger = NULL;
1028 static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
1029 static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
1030
1031 void
1032 te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
1033 {
1034 if (free_timers) {
1035 mainloop_timer_del(stonith_history_sync_timer_short);
1036 stonith_history_sync_timer_short = NULL;
1037 mainloop_timer_del(stonith_history_sync_timer_long);
1038 stonith_history_sync_timer_long = NULL;
1039 } else {
1040 mainloop_timer_stop(stonith_history_sync_timer_short);
1041 mainloop_timer_stop(stonith_history_sync_timer_long);
1042 }
1043
1044 if (st) {
1045 st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
1046 }
1047 }
1048
1049 static void
1050 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
1051 {
1052 te_cleanup_stonith_history_sync(st, FALSE);
1053 crm_debug("Fence-history synced - cancel all timers");
1054 }
1055
1056 static gboolean
1057 stonith_history_sync_set_trigger(gpointer user_data)
1058 {
1059 mainloop_set_trigger(stonith_history_sync_trigger);
1060 return FALSE;
1061 }
1062
1063 void
1064 te_trigger_stonith_history_sync(bool long_timeout)
1065 {
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083 if (stonith_history_sync_trigger == NULL) {
1084 stonith_history_sync_trigger =
1085 mainloop_add_trigger(G_PRIORITY_LOW,
1086 do_stonith_history_sync, NULL);
1087 }
1088
1089 if (long_timeout) {
1090 if(stonith_history_sync_timer_long == NULL) {
1091 stonith_history_sync_timer_long =
1092 mainloop_timer_add("history_sync_long", 30000,
1093 FALSE, stonith_history_sync_set_trigger,
1094 NULL);
1095 }
1096 crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
1097 mainloop_timer_start(stonith_history_sync_timer_long);
1098 } else {
1099 if(stonith_history_sync_timer_short == NULL) {
1100 stonith_history_sync_timer_short =
1101 mainloop_timer_add("history_sync_short", 5000,
1102 FALSE, stonith_history_sync_set_trigger,
1103 NULL);
1104 }
1105 crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
1106 mainloop_timer_start(stonith_history_sync_timer_short);
1107 }
1108
1109 }
1110
1111