This source file includes following definitions.
- update_stonith_max_attempts
- set_fence_reaction
- controld_configure_fencing
- too_many_st_failures
- st_fail_count_reset
- st_fail_count_increment
- cib_fencing_updated
- send_stonith_update
- abort_for_stonith_failure
- add_stonith_cleanup
- remove_stonith_cleanup
- purge_stonith_cleanup
- execute_stonith_cleanup
- fail_incompletable_stonith
- tengine_stonith_connection_destroy
- handle_fence_notification
- controld_timer_fencer_connect
- controld_disconnect_fencer
- do_stonith_history_sync
- tengine_stonith_callback
- fence_with_delay
- controld_execute_fence_action
- controld_verify_stonith_watchdog_timeout
- te_cleanup_stonith_history_sync
- tengine_stonith_history_synced
- stonith_history_sync_set_trigger
- te_trigger_stonith_history_sync
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include <crm/crm.h>
12 #include <crm/common/xml.h>
13 #include <crm/stonith-ng.h>
14 #include <crm/fencing/internal.h>
15
16 #include <pacemaker-controld.h>
17
18 static void
19 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
20
21
22
23
24
25
26
27
28
29 struct st_fail_rec {
30 int count;
31 };
32
33 #define DEFAULT_STONITH_MAX_ATTEMPTS 10
34
35 static bool fence_reaction_panic = false;
36 static unsigned long int stonith_max_attempts = DEFAULT_STONITH_MAX_ATTEMPTS;
37 static GHashTable *stonith_failures = NULL;
38
39
40
41
42
43
44
45 static void
46 update_stonith_max_attempts(const char *value)
47 {
48 int score = 0;
49 int rc = pcmk_parse_score(value, &score, DEFAULT_STONITH_MAX_ATTEMPTS);
50
51
52 CRM_CHECK((rc == pcmk_rc_ok) && (score > 0), return);
53
54 if (stonith_max_attempts != score) {
55 crm_debug("Maximum fencing attempts per transition is now %d (was %lu)",
56 score, stonith_max_attempts);
57 }
58 stonith_max_attempts = score;
59 }
60
61
62
63
64
65
66
67 static void
68 set_fence_reaction(const char *reaction_s)
69 {
70 if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
71 fence_reaction_panic = true;
72
73 } else {
74 if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) {
75 crm_warn("Invalid value '%s' for %s, using 'stop'",
76 reaction_s, PCMK_OPT_FENCE_REACTION);
77 }
78 fence_reaction_panic = false;
79 }
80 }
81
82
83
84
85
86
87
88 void
89 controld_configure_fencing(GHashTable *options)
90 {
91 const char *value = NULL;
92
93 value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION);
94 set_fence_reaction(value);
95
96 value = g_hash_table_lookup(options, PCMK_OPT_STONITH_MAX_ATTEMPTS);
97 update_stonith_max_attempts(value);
98 }
99
100 static gboolean
101 too_many_st_failures(const char *target)
102 {
103 GHashTableIter iter;
104 const char *key = NULL;
105 struct st_fail_rec *value = NULL;
106
107 if (stonith_failures == NULL) {
108 return FALSE;
109 }
110
111 if (target == NULL) {
112 g_hash_table_iter_init(&iter, stonith_failures);
113 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
114 (gpointer *) &value)) {
115
116 if (value->count >= stonith_max_attempts) {
117 target = (const char*)key;
118 goto too_many;
119 }
120 }
121 } else {
122 value = g_hash_table_lookup(stonith_failures, target);
123 if ((value != NULL) && (value->count >= stonith_max_attempts)) {
124 goto too_many;
125 }
126 }
127 return FALSE;
128
129 too_many:
130 crm_warn("Too many failures (%d) to fence %s, giving up",
131 value->count, target);
132 return TRUE;
133 }
134
135
136
137
138
139
140
141 void
142 st_fail_count_reset(const char *target)
143 {
144 if (stonith_failures == NULL) {
145 return;
146 }
147
148 if (target) {
149 struct st_fail_rec *rec = NULL;
150
151 rec = g_hash_table_lookup(stonith_failures, target);
152 if (rec) {
153 rec->count = 0;
154 }
155 } else {
156 GHashTableIter iter;
157 const char *key = NULL;
158 struct st_fail_rec *rec = NULL;
159
160 g_hash_table_iter_init(&iter, stonith_failures);
161 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
162 (gpointer *) &rec)) {
163 rec->count = 0;
164 }
165 }
166 }
167
168 static void
169 st_fail_count_increment(const char *target)
170 {
171 struct st_fail_rec *rec = NULL;
172
173 if (stonith_failures == NULL) {
174 stonith_failures = pcmk__strkey_table(free, free);
175 }
176
177 rec = g_hash_table_lookup(stonith_failures, target);
178 if (rec) {
179 rec->count++;
180 } else {
181 rec = malloc(sizeof(struct st_fail_rec));
182 if(rec == NULL) {
183 return;
184 }
185
186 rec->count = 1;
187 g_hash_table_insert(stonith_failures, pcmk__str_copy(target), rec);
188 }
189 }
190
191
192
193
194 static void
195 cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
196 void *user_data)
197 {
198 if (rc < pcmk_ok) {
199 crm_err("Fencing update %d for %s: failed - %s (%d)",
200 call_id, (char *)user_data, pcmk_strerror(rc), rc);
201 crm_log_xml_warn(msg, "Failed update");
202 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown,
203 "CIB update failed", NULL);
204
205 } else {
206 crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
207 }
208 }
209
210 static void
211 send_stonith_update(pcmk__graph_action_t *action, const char *target,
212 const char *uuid)
213 {
214 int rc = pcmk_ok;
215 crm_node_t *peer = NULL;
216
217
218
219
220
221 int flags = node_update_join | node_update_expected;
222
223
224 xmlNode *node_state = NULL;
225
226 CRM_CHECK(target != NULL, return);
227 CRM_CHECK(uuid != NULL, return);
228
229
230
231
232
233 peer = pcmk__get_node(0, target, uuid, pcmk__node_search_any);
234
235 CRM_CHECK(peer != NULL, return);
236
237 if (peer->state == NULL) {
238
239
240
241
242 flags |= node_update_cluster;
243 }
244
245 if (peer->uuid == NULL) {
246 crm_info("Recording uuid '%s' for node '%s'", uuid, target);
247 peer->uuid = pcmk__str_copy(uuid);
248 }
249
250 crmd_peer_down(peer, TRUE);
251
252
253 node_state = create_node_state_update(peer, flags, NULL, __func__);
254
255
256 if (peer->flags & crm_remote_node) {
257 char *now_s = pcmk__ttoa(time(NULL));
258
259 crm_xml_add(node_state, PCMK__XA_NODE_FENCED, now_s);
260 free(now_s);
261 }
262
263
264 crm_xml_add(node_state, PCMK_XA_ID, uuid);
265
266 rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
267 PCMK_XE_STATUS, node_state,
268 cib_scope_local
269 |cib_can_create);
270
271
272 crm_debug("Sending fencing update %d for %s", rc, target);
273 fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated);
274
275
276
277
278
279
280 controld_delete_node_state(peer->uname, controld_section_all,
281 cib_scope_local);
282 free_xml(node_state);
283 return;
284 }
285
286
287
288
289
290
291
292
293
294 static void
295 abort_for_stonith_failure(enum pcmk__graph_next abort_action,
296 const char *target, const xmlNode *reason)
297 {
298
299
300
301 if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
302 abort_action = pcmk__graph_wait;
303 }
304 abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed",
305 reason);
306 }
307
308
309
310
311
312
313
314
315
316
317 static GList *stonith_cleanup_list = NULL;
318
319
320
321
322
323
324
325 void
326 add_stonith_cleanup(const char *target) {
327 stonith_cleanup_list = g_list_append(stonith_cleanup_list,
328 pcmk__str_copy(target));
329 }
330
331
332
333
334
335
336
337 void
338 remove_stonith_cleanup(const char *target)
339 {
340 GList *iter = stonith_cleanup_list;
341
342 while (iter != NULL) {
343 GList *tmp = iter;
344 char *iter_name = tmp->data;
345
346 iter = iter->next;
347 if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
348 crm_trace("Removing %s from the cleanup list", iter_name);
349 stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
350 free(iter_name);
351 }
352 }
353 }
354
355
356
357
358
359 void
360 purge_stonith_cleanup(void)
361 {
362 if (stonith_cleanup_list) {
363 GList *iter = NULL;
364
365 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
366 char *target = iter->data;
367
368 crm_info("Purging %s from stonith cleanup list", target);
369 free(target);
370 }
371 g_list_free(stonith_cleanup_list);
372 stonith_cleanup_list = NULL;
373 }
374 }
375
376
377
378
379
380 void
381 execute_stonith_cleanup(void)
382 {
383 GList *iter;
384
385 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
386 char *target = iter->data;
387 crm_node_t *target_node =
388 pcmk__get_node(0, target, NULL, pcmk__node_search_cluster_member);
389 const char *uuid = pcmk__cluster_node_uuid(target_node);
390
391 crm_notice("Marking %s, target of a previous stonith action, as clean", target);
392 send_stonith_update(NULL, target, uuid);
393 free(target);
394 }
395 g_list_free(stonith_cleanup_list);
396 stonith_cleanup_list = NULL;
397 }
398
399
400
401
402
403
404
405
406
407 static stonith_t *stonith_api = NULL;
408 static mainloop_timer_t *controld_fencer_connect_timer = NULL;
409 static char *te_client_id = NULL;
410
411 static gboolean
412 fail_incompletable_stonith(pcmk__graph_t *graph)
413 {
414 GList *lpc = NULL;
415 const char *task = NULL;
416 xmlNode *last_action = NULL;
417
418 if (graph == NULL) {
419 return FALSE;
420 }
421
422 for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
423 GList *lpc2 = NULL;
424 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
425
426 if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
427 continue;
428 }
429
430 for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
431 pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
432
433 if ((action->type != pcmk__cluster_graph_action)
434 || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
435 continue;
436 }
437
438 task = crm_element_value(action->xml, PCMK_XA_OPERATION);
439 if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
440 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
441 last_action = action->xml;
442 pcmk__update_graph(graph, action);
443 crm_notice("Failing action %d (%s): fencer terminated",
444 action->id, pcmk__xe_id(action->xml));
445 }
446 }
447 }
448
449 if (last_action != NULL) {
450 crm_warn("Fencer failure resulted in unrunnable actions");
451 abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
452 return TRUE;
453 }
454
455 return FALSE;
456 }
457
458 static void
459 tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
460 {
461 te_cleanup_stonith_history_sync(st, FALSE);
462
463 if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
464 crm_err("Lost fencer connection (will attempt to reconnect)");
465 if (!mainloop_timer_running(controld_fencer_connect_timer)) {
466 mainloop_timer_start(controld_fencer_connect_timer);
467 }
468 } else {
469 crm_info("Disconnected from fencer");
470 }
471
472 if (stonith_api) {
473
474
475
476 if (stonith_api->state != stonith_disconnected) {
477 stonith_api->cmds->disconnect(st);
478 }
479 stonith_api->cmds->remove_notification(stonith_api, NULL);
480 }
481
482 if (AM_I_DC) {
483 fail_incompletable_stonith(controld_globals.transition_graph);
484 trigger_graph();
485 }
486 }
487
488
489
490
491
492
493
494
495 static void
496 handle_fence_notification(stonith_t *st, stonith_event_t *event)
497 {
498 bool succeeded = true;
499 const char *executioner = "the cluster";
500 const char *client = "a client";
501 const char *reason = NULL;
502 int exec_status;
503
504 if (te_client_id == NULL) {
505 te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
506 (unsigned long) getpid());
507 }
508
509 if (event == NULL) {
510 crm_err("Notify data not found");
511 return;
512 }
513
514 if (event->executioner != NULL) {
515 executioner = event->executioner;
516 }
517 if (event->client_origin != NULL) {
518 client = event->client_origin;
519 }
520
521 exec_status = stonith__event_execution_status(event);
522 if ((stonith__event_exit_status(event) != CRM_EX_OK)
523 || (exec_status != PCMK_EXEC_DONE)) {
524 succeeded = false;
525 if (exec_status == PCMK_EXEC_DONE) {
526 exec_status = PCMK_EXEC_ERROR;
527 }
528 }
529 reason = stonith__event_exit_reason(event);
530
531 crmd_alert_fencing_op(event);
532
533 if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) {
534
535 if (succeeded) {
536 crm_notice("%s was unfenced by %s at the request of %s@%s",
537 event->target, executioner, client, event->origin);
538 } else {
539 crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
540 event->target, executioner,
541 pcmk_exec_status_str(exec_status),
542 ((reason == NULL)? "" : ": "),
543 ((reason == NULL)? "" : reason),
544 stonith__event_exit_status(event));
545 }
546 return;
547 }
548
549 if (succeeded
550 && pcmk__str_eq(event->target, controld_globals.our_nodename,
551 pcmk__str_casei)) {
552
553
554
555
556
557
558
559
560
561 crm_crit("We were allegedly just fenced by %s for %s!",
562 executioner, event->origin);
563 if (fence_reaction_panic) {
564 pcmk__panic(__func__);
565 } else {
566 crm_exit(CRM_EX_FATAL);
567 }
568 return;
569 }
570
571
572
573
574
575 if (!AM_I_DC) {
576 if (succeeded) {
577 st_fail_count_reset(event->target);
578 } else {
579 st_fail_count_increment(event->target);
580 }
581 }
582
583 crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
584 "%s%s%s%s " CRM_XS " event=%s",
585 event->target, (succeeded? "" : " not"),
586 event->action, executioner, client, event->origin,
587 (succeeded? "OK" : pcmk_exec_status_str(exec_status)),
588 ((reason == NULL)? "" : " ("),
589 ((reason == NULL)? "" : reason),
590 ((reason == NULL)? "" : ")"),
591 event->id);
592
593 if (succeeded) {
594 const uint32_t flags = pcmk__node_search_any
595 |pcmk__node_search_cluster_cib;
596
597 crm_node_t *peer = pcmk__search_node_caches(0, event->target, flags);
598 const char *uuid = NULL;
599
600 if (peer == NULL) {
601 return;
602 }
603
604 uuid = pcmk__cluster_node_uuid(peer);
605
606 if (AM_I_DC) {
607
608 send_stonith_update(NULL, event->target, uuid);
609
610
611
612
613
614
615
616
617
618 if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
619
620
621
622 crm_info("External fencing operation from %s fenced %s",
623 client, event->target);
624 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
625 "External Fencing Operation", NULL);
626 }
627
628 } else if (pcmk__str_eq(controld_globals.dc_name, event->target,
629 pcmk__str_null_matches|pcmk__str_casei)
630 && !pcmk_is_set(peer->flags, crm_remote_node)) {
631
632
633 if (controld_globals.dc_name != NULL) {
634 crm_notice("Fencing target %s was our DC", event->target);
635 } else {
636 crm_notice("Fencing target %s may have been our DC",
637 event->target);
638 }
639
640
641
642
643
644 if (pcmk__str_eq(event->executioner, controld_globals.our_nodename,
645 pcmk__str_casei)) {
646 send_stonith_update(NULL, event->target, uuid);
647 }
648 add_stonith_cleanup(event->target);
649 }
650
651
652
653
654
655
656 if (pcmk_is_set(peer->flags, crm_remote_node)) {
657 remote_ra_fail(event->target);
658 }
659
660 crmd_peer_down(peer, TRUE);
661 }
662 }
663
664
665
666
667
668
669
670
671
672
673 gboolean
674 controld_timer_fencer_connect(gpointer user_data)
675 {
676 int rc = pcmk_ok;
677
678 if (stonith_api == NULL) {
679 stonith_api = stonith_api_new();
680 if (stonith_api == NULL) {
681 crm_err("Could not connect to fencer: API memory allocation failed");
682 return G_SOURCE_REMOVE;
683 }
684 }
685
686 if (stonith_api->state != stonith_disconnected) {
687 crm_trace("Already connected to fencer, no need to retry");
688 return G_SOURCE_REMOVE;
689 }
690
691 if (user_data == NULL) {
692
693 rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
694 if (rc != pcmk_ok) {
695 crm_err("Could not connect to fencer in 30 attempts: %s "
696 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
697 }
698 } else {
699
700 rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
701
702 if (controld_fencer_connect_timer == NULL) {
703 controld_fencer_connect_timer =
704 mainloop_timer_add("controld_fencer_connect", 1000,
705 TRUE, controld_timer_fencer_connect,
706 GINT_TO_POINTER(TRUE));
707 }
708
709 if (rc != pcmk_ok) {
710 if (pcmk_is_set(controld_globals.fsa_input_register,
711 R_ST_REQUIRED)) {
712 crm_notice("Fencer connection failed (will retry): %s "
713 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
714
715 if (!mainloop_timer_running(controld_fencer_connect_timer)) {
716 mainloop_timer_start(controld_fencer_connect_timer);
717 }
718
719 return G_SOURCE_CONTINUE;
720 } else {
721 crm_info("Fencer connection failed (ignoring because no longer required): %s "
722 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
723 }
724 return G_SOURCE_REMOVE;
725 }
726 }
727
728 if (rc == pcmk_ok) {
729 stonith_api_operations_t *cmds = stonith_api->cmds;
730
731 cmds->register_notification(stonith_api,
732 PCMK__VALUE_ST_NOTIFY_DISCONNECT,
733 tengine_stonith_connection_destroy);
734 cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_FENCE,
735 handle_fence_notification);
736 cmds->register_notification(stonith_api,
737 PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
738 tengine_stonith_history_synced);
739 te_trigger_stonith_history_sync(TRUE);
740 crm_notice("Fencer successfully connected");
741 }
742
743 return G_SOURCE_REMOVE;
744 }
745
746 void
747 controld_disconnect_fencer(bool destroy)
748 {
749 if (stonith_api) {
750
751 controld_clear_fsa_input_flags(R_ST_REQUIRED);
752
753 if (stonith_api->state != stonith_disconnected) {
754 stonith_api->cmds->disconnect(stonith_api);
755 }
756 stonith_api->cmds->remove_notification(stonith_api, NULL);
757 }
758 if (destroy) {
759 if (stonith_api) {
760 stonith_api->cmds->free(stonith_api);
761 stonith_api = NULL;
762 }
763 if (controld_fencer_connect_timer) {
764 mainloop_timer_del(controld_fencer_connect_timer);
765 controld_fencer_connect_timer = NULL;
766 }
767 if (te_client_id) {
768 free(te_client_id);
769 te_client_id = NULL;
770 }
771 }
772 }
773
774 static gboolean
775 do_stonith_history_sync(gpointer user_data)
776 {
777 if (stonith_api && (stonith_api->state != stonith_disconnected)) {
778 stonith_history_t *history = NULL;
779
780 te_cleanup_stonith_history_sync(stonith_api, FALSE);
781 stonith_api->cmds->history(stonith_api,
782 st_opt_sync_call | st_opt_broadcast,
783 NULL, &history, 5);
784 stonith_history_free(history);
785 return TRUE;
786 } else {
787 crm_info("Skip triggering stonith history-sync as stonith is disconnected");
788 return FALSE;
789 }
790 }
791
792 static void
793 tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
794 {
795 char *uuid = NULL;
796 int stonith_id = -1;
797 int transition_id = -1;
798 pcmk__graph_action_t *action = NULL;
799 const char *target = NULL;
800
801 if ((data == NULL) || (data->userdata == NULL)) {
802 crm_err("Ignoring fence operation %d result: "
803 "No transition key given (bug?)",
804 ((data == NULL)? -1 : data->call_id));
805 return;
806 }
807
808 if (!AM_I_DC) {
809 const char *reason = stonith__exit_reason(data);
810
811 if (reason == NULL) {
812 reason = pcmk_exec_status_str(stonith__execution_status(data));
813 }
814 crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
815 data->call_id, stonith__exit_status(data), reason,
816 (const char *) data->userdata);
817 return;
818 }
819
820 CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
821 &stonith_id, NULL),
822 goto bail);
823
824 if (controld_globals.transition_graph->complete || (stonith_id < 0)
825 || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
826 || (controld_globals.transition_graph->id != transition_id)) {
827 crm_info("Ignoring fence operation %d result: "
828 "Not from current transition " CRM_XS
829 " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
830 data->call_id,
831 pcmk__btoa(controld_globals.transition_graph->complete),
832 stonith_id, uuid, controld_globals.te_uuid, transition_id,
833 controld_globals.transition_graph->id);
834 goto bail;
835 }
836
837 action = controld_get_action(stonith_id);
838 if (action == NULL) {
839 crm_err("Ignoring fence operation %d result: "
840 "Action %d not found in transition graph (bug?) "
841 CRM_XS " uuid=%s transition=%d",
842 data->call_id, stonith_id, uuid, transition_id);
843 goto bail;
844 }
845
846 target = crm_element_value(action->xml, PCMK__META_ON_NODE);
847 if (target == NULL) {
848 crm_err("Ignoring fence operation %d result: No target given (bug?)",
849 data->call_id);
850 goto bail;
851 }
852
853 stop_te_timer(action);
854 if (stonith__exit_status(data) == CRM_EX_OK) {
855 const char *uuid = crm_element_value(action->xml,
856 PCMK__META_ON_NODE_UUID);
857 const char *op = crm_meta_value(action->params,
858 PCMK__META_STONITH_ACTION);
859
860 crm_info("Fence operation %d for %s succeeded", data->call_id, target);
861 if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
862 te_action_confirmed(action, NULL);
863 if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) {
864 const char *value = NULL;
865 char *now = pcmk__ttoa(time(NULL));
866 gboolean is_remote_node = FALSE;
867
868
869
870
871
872
873
874
875 if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
876 is_remote_node = TRUE;
877 }
878
879 update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
880 is_remote_node);
881 free(now);
882
883 value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL);
884 update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
885 is_remote_node);
886
887 value = crm_meta_value(action->params,
888 PCMK__META_DIGESTS_SECURE);
889 update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
890 is_remote_node);
891
892 } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
893 send_stonith_update(action, target, uuid);
894 pcmk__set_graph_action_flags(action,
895 pcmk__graph_action_sent_update);
896 }
897 }
898 st_fail_count_reset(target);
899
900 } else {
901 enum pcmk__graph_next abort_action = pcmk__graph_restart;
902 int status = stonith__execution_status(data);
903 const char *reason = stonith__exit_reason(data);
904
905 if (reason == NULL) {
906 if (status == PCMK_EXEC_DONE) {
907 reason = "Agent returned error";
908 } else {
909 reason = pcmk_exec_status_str(status);
910 }
911 }
912 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
913
914
915
916
917 if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
918 crm_warn("Fence operation %d for %s failed: %s "
919 "(aborting transition and giving up for now)",
920 data->call_id, target, reason);
921 abort_action = pcmk__graph_wait;
922 } else {
923 crm_notice("Fence operation %d for %s failed: %s "
924 "(aborting transition)", data->call_id, target, reason);
925 }
926
927
928
929
930
931 st_fail_count_increment(target);
932 abort_for_stonith_failure(abort_action, target, NULL);
933 }
934
935 pcmk__update_graph(controld_globals.transition_graph, action);
936 trigger_graph();
937
938 bail:
939 free(data->userdata);
940 free(uuid);
941 return;
942 }
943
944 static int
945 fence_with_delay(const char *target, const char *type, int delay)
946 {
947 uint32_t options = st_opt_none;
948 int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout
949 / 1000);
950
951 if (crmd_join_phase_count(crm_join_confirmed) == 1) {
952 stonith__set_call_options(options, target, st_opt_allow_self_fencing);
953 }
954 return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
955 type, timeout_sec, 0, delay);
956 }
957
958
959
960
961
962
963
964
965
966
967 int
968 controld_execute_fence_action(pcmk__graph_t *graph,
969 pcmk__graph_action_t *action)
970 {
971 int rc = 0;
972 const char *id = pcmk__xe_id(action->xml);
973 const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
974 const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
975 const char *type = crm_meta_value(action->params,
976 PCMK__META_STONITH_ACTION);
977 char *transition_key = NULL;
978 const char *priority_delay = NULL;
979 int delay_i = 0;
980 gboolean invalid_action = FALSE;
981 int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout
982 / 1000);
983
984 CRM_CHECK(id != NULL, invalid_action = TRUE);
985 CRM_CHECK(uuid != NULL, invalid_action = TRUE);
986 CRM_CHECK(type != NULL, invalid_action = TRUE);
987 CRM_CHECK(target != NULL, invalid_action = TRUE);
988
989 if (invalid_action) {
990 crm_log_xml_warn(action->xml, "BadAction");
991 return EPROTO;
992 }
993
994 priority_delay = crm_meta_value(action->params,
995 PCMK_OPT_PRIORITY_FENCING_DELAY);
996
997 crm_notice("Requesting fencing (%s) targeting node %s "
998 CRM_XS " action=%s timeout=%i%s%s",
999 type, target, id, stonith_timeout,
1000 priority_delay ? " priority_delay=" : "",
1001 priority_delay ? priority_delay : "");
1002
1003
1004 controld_timer_fencer_connect(NULL);
1005
1006 pcmk__scan_min_int(priority_delay, &delay_i, 0);
1007 rc = fence_with_delay(target, type, delay_i);
1008 transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
1009 action->id, 0,
1010 controld_globals.te_uuid),
1011 stonith_api->cmds->register_callback(stonith_api, rc,
1012 (stonith_timeout
1013 + (delay_i > 0 ? delay_i : 0)),
1014 st_opt_timeout_updates, transition_key,
1015 "tengine_stonith_callback",
1016 tengine_stonith_callback);
1017 return pcmk_rc_ok;
1018 }
1019
1020 bool
1021 controld_verify_stonith_watchdog_timeout(const char *value)
1022 {
1023 long long st_timeout = (value != NULL)? crm_get_msec(value) : 0;
1024 const char *our_nodename = controld_globals.our_nodename;
1025
1026 if (st_timeout == 0
1027 || (stonith_api && (stonith_api->state != stonith_disconnected) &&
1028 stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
1029 our_nodename))) {
1030 return pcmk__valid_stonith_watchdog_timeout(value);
1031 }
1032 return true;
1033 }
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045 static crm_trigger_t *stonith_history_sync_trigger = NULL;
1046 static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
1047 static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
1048
1049 void
1050 te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
1051 {
1052 if (free_timers) {
1053 mainloop_timer_del(stonith_history_sync_timer_short);
1054 stonith_history_sync_timer_short = NULL;
1055 mainloop_timer_del(stonith_history_sync_timer_long);
1056 stonith_history_sync_timer_long = NULL;
1057 } else {
1058 mainloop_timer_stop(stonith_history_sync_timer_short);
1059 mainloop_timer_stop(stonith_history_sync_timer_long);
1060 }
1061
1062 if (st) {
1063 st->cmds->remove_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED);
1064 }
1065 }
1066
1067 static void
1068 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
1069 {
1070 te_cleanup_stonith_history_sync(st, FALSE);
1071 crm_debug("Fence-history synced - cancel all timers");
1072 }
1073
1074 static gboolean
1075 stonith_history_sync_set_trigger(gpointer user_data)
1076 {
1077 mainloop_set_trigger(stonith_history_sync_trigger);
1078 return FALSE;
1079 }
1080
1081 void
1082 te_trigger_stonith_history_sync(bool long_timeout)
1083 {
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101 if (stonith_history_sync_trigger == NULL) {
1102 stonith_history_sync_trigger =
1103 mainloop_add_trigger(G_PRIORITY_LOW,
1104 do_stonith_history_sync, NULL);
1105 }
1106
1107 if (long_timeout) {
1108 if(stonith_history_sync_timer_long == NULL) {
1109 stonith_history_sync_timer_long =
1110 mainloop_timer_add("history_sync_long", 30000,
1111 FALSE, stonith_history_sync_set_trigger,
1112 NULL);
1113 }
1114 crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
1115 mainloop_timer_start(stonith_history_sync_timer_long);
1116 } else {
1117 if(stonith_history_sync_timer_short == NULL) {
1118 stonith_history_sync_timer_short =
1119 mainloop_timer_add("history_sync_short", 5000,
1120 FALSE, stonith_history_sync_set_trigger,
1121 NULL);
1122 }
1123 crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
1124 mainloop_timer_start(stonith_history_sync_timer_short);
1125 }
1126
1127 }
1128
1129