This source file includes following definitions.
- update_stonith_max_attempts
- set_fence_reaction
- controld_configure_fencing
- too_many_st_failures
- st_fail_count_reset
- st_fail_count_increment
- cib_fencing_updated
- send_stonith_update
- abort_for_stonith_failure
- add_stonith_cleanup
- remove_stonith_cleanup
- purge_stonith_cleanup
- execute_stonith_cleanup
- fail_incompletable_stonith
- tengine_stonith_connection_destroy
- handle_fence_notification
- controld_timer_fencer_connect
- controld_disconnect_fencer
- do_stonith_history_sync
- tengine_stonith_callback
- fence_with_delay
- controld_execute_fence_action
- controld_verify_stonith_watchdog_timeout
- te_cleanup_stonith_history_sync
- tengine_stonith_history_synced
- stonith_history_sync_set_trigger
- te_trigger_stonith_history_sync
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include <crm/crm.h>
12 #include <crm/common/xml.h>
13 #include <crm/stonith-ng.h>
14 #include <crm/fencing/internal.h>
15
16 #include <pacemaker-controld.h>
17
18 static void
19 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
20
21
22
23
24
25
26
27
28
29 struct st_fail_rec {
30 int count;
31 };
32
33 #define DEFAULT_STONITH_MAX_ATTEMPTS 10
34
35 static bool fence_reaction_panic = false;
36 static unsigned long int stonith_max_attempts = DEFAULT_STONITH_MAX_ATTEMPTS;
37 static GHashTable *stonith_failures = NULL;
38
39
40
41
42
43
44
45 static void
46 update_stonith_max_attempts(const char *value)
47 {
48 int score = 0;
49 int rc = pcmk_parse_score(value, &score, DEFAULT_STONITH_MAX_ATTEMPTS);
50
51
52 CRM_CHECK((rc == pcmk_rc_ok) && (score > 0), return);
53
54 if (stonith_max_attempts != score) {
55 crm_debug("Maximum fencing attempts per transition is now %d (was %lu)",
56 score, stonith_max_attempts);
57 }
58 stonith_max_attempts = score;
59 }
60
61
62
63
64
65
66
67 static void
68 set_fence_reaction(const char *reaction_s)
69 {
70 if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
71 fence_reaction_panic = true;
72
73 } else {
74 if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) {
75 crm_warn("Invalid value '%s' for %s, using 'stop'",
76 reaction_s, PCMK_OPT_FENCE_REACTION);
77 }
78 fence_reaction_panic = false;
79 }
80 }
81
82
83
84
85
86
87
88 void
89 controld_configure_fencing(GHashTable *options)
90 {
91 const char *value = NULL;
92
93 value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION);
94 set_fence_reaction(value);
95
96 value = g_hash_table_lookup(options, PCMK_OPT_STONITH_MAX_ATTEMPTS);
97 update_stonith_max_attempts(value);
98 }
99
100 static gboolean
101 too_many_st_failures(const char *target)
102 {
103 GHashTableIter iter;
104 const char *key = NULL;
105 struct st_fail_rec *value = NULL;
106
107 if (stonith_failures == NULL) {
108 return FALSE;
109 }
110
111 if (target == NULL) {
112 g_hash_table_iter_init(&iter, stonith_failures);
113 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
114 (gpointer *) &value)) {
115
116 if (value->count >= stonith_max_attempts) {
117 target = (const char*)key;
118 goto too_many;
119 }
120 }
121 } else {
122 value = g_hash_table_lookup(stonith_failures, target);
123 if ((value != NULL) && (value->count >= stonith_max_attempts)) {
124 goto too_many;
125 }
126 }
127 return FALSE;
128
129 too_many:
130 crm_warn("Too many failures (%d) to fence %s, giving up",
131 value->count, target);
132 return TRUE;
133 }
134
135
136
137
138
139
140
141 void
142 st_fail_count_reset(const char *target)
143 {
144 if (stonith_failures == NULL) {
145 return;
146 }
147
148 if (target) {
149 struct st_fail_rec *rec = NULL;
150
151 rec = g_hash_table_lookup(stonith_failures, target);
152 if (rec) {
153 rec->count = 0;
154 }
155 } else {
156 GHashTableIter iter;
157 const char *key = NULL;
158 struct st_fail_rec *rec = NULL;
159
160 g_hash_table_iter_init(&iter, stonith_failures);
161 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
162 (gpointer *) &rec)) {
163 rec->count = 0;
164 }
165 }
166 }
167
168 static void
169 st_fail_count_increment(const char *target)
170 {
171 struct st_fail_rec *rec = NULL;
172
173 if (stonith_failures == NULL) {
174 stonith_failures = pcmk__strkey_table(free, free);
175 }
176
177 rec = g_hash_table_lookup(stonith_failures, target);
178 if (rec) {
179 rec->count++;
180 } else {
181 rec = malloc(sizeof(struct st_fail_rec));
182 if(rec == NULL) {
183 return;
184 }
185
186 rec->count = 1;
187 g_hash_table_insert(stonith_failures, pcmk__str_copy(target), rec);
188 }
189 }
190
191
192
193
194 static void
195 cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
196 void *user_data)
197 {
198 if (rc < pcmk_ok) {
199 crm_err("Fencing update %d for %s: failed - %s (%d)",
200 call_id, (char *)user_data, pcmk_strerror(rc), rc);
201 crm_log_xml_warn(msg, "Failed update");
202 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown,
203 "CIB update failed", NULL);
204
205 } else {
206 crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
207 }
208 }
209
210 static void
211 send_stonith_update(pcmk__graph_action_t *action, const char *target,
212 const char *uuid)
213 {
214 int rc = pcmk_ok;
215 pcmk__node_status_t *peer = NULL;
216
217
218
219
220
221 int flags = node_update_join | node_update_expected;
222
223
224 xmlNode *node_state = NULL;
225
226 CRM_CHECK(target != NULL, return);
227 CRM_CHECK(uuid != NULL, return);
228
229
230
231
232
233 peer = pcmk__get_node(0, target, uuid, pcmk__node_search_any);
234
235 CRM_CHECK(peer != NULL, return);
236
237 if (peer->state == NULL) {
238
239
240
241
242 flags |= node_update_cluster;
243 }
244
245 if (peer->xml_id == NULL) {
246 crm_info("Recording XML ID '%s' for node '%s'", uuid, target);
247 peer->xml_id = pcmk__str_copy(uuid);
248 }
249
250 crmd_peer_down(peer, TRUE);
251
252
253 node_state = create_node_state_update(peer, flags, NULL, __func__);
254
255
256 if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
257 char *now_s = pcmk__ttoa(time(NULL));
258
259 crm_xml_add(node_state, PCMK__XA_NODE_FENCED, now_s);
260 free(now_s);
261 }
262
263
264 crm_xml_add(node_state, PCMK_XA_ID, uuid);
265
266 rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
267 PCMK_XE_STATUS, node_state,
268 cib_can_create);
269
270
271 crm_debug("Sending fencing update %d for %s", rc, target);
272 fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated);
273
274
275
276
277
278
279 controld_delete_node_state(peer->name, controld_section_all, cib_none);
280 pcmk__xml_free(node_state);
281 return;
282 }
283
284
285
286
287
288
289
290
291
292 static void
293 abort_for_stonith_failure(enum pcmk__graph_next abort_action,
294 const char *target, const xmlNode *reason)
295 {
296
297
298
299 if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
300 abort_action = pcmk__graph_wait;
301 }
302 abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed",
303 reason);
304 }
305
306
307
308
309
310
311
312
313
314
315 static GList *stonith_cleanup_list = NULL;
316
317
318
319
320
321
322
323 void
324 add_stonith_cleanup(const char *target) {
325 stonith_cleanup_list = g_list_append(stonith_cleanup_list,
326 pcmk__str_copy(target));
327 }
328
329
330
331
332
333
334
335 void
336 remove_stonith_cleanup(const char *target)
337 {
338 GList *iter = stonith_cleanup_list;
339
340 while (iter != NULL) {
341 GList *tmp = iter;
342 char *iter_name = tmp->data;
343
344 iter = iter->next;
345 if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
346 crm_trace("Removing %s from the cleanup list", iter_name);
347 stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
348 free(iter_name);
349 }
350 }
351 }
352
353
354
355
356
357 void
358 purge_stonith_cleanup(void)
359 {
360 if (stonith_cleanup_list) {
361 GList *iter = NULL;
362
363 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
364 char *target = iter->data;
365
366 crm_info("Purging %s from stonith cleanup list", target);
367 free(target);
368 }
369 g_list_free(stonith_cleanup_list);
370 stonith_cleanup_list = NULL;
371 }
372 }
373
374
375
376
377
378 void
379 execute_stonith_cleanup(void)
380 {
381 GList *iter;
382
383 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
384 char *target = iter->data;
385 pcmk__node_status_t *target_node =
386 pcmk__get_node(0, target, NULL, pcmk__node_search_cluster_member);
387 const char *uuid = pcmk__cluster_node_uuid(target_node);
388
389 crm_notice("Marking %s, target of a previous stonith action, as clean", target);
390 send_stonith_update(NULL, target, uuid);
391 free(target);
392 }
393 g_list_free(stonith_cleanup_list);
394 stonith_cleanup_list = NULL;
395 }
396
397
398
399
400
401
402
403
404
405 static stonith_t *stonith_api = NULL;
406 static mainloop_timer_t *controld_fencer_connect_timer = NULL;
407 static char *te_client_id = NULL;
408
409 static gboolean
410 fail_incompletable_stonith(pcmk__graph_t *graph)
411 {
412 GList *lpc = NULL;
413 const char *task = NULL;
414 xmlNode *last_action = NULL;
415
416 if (graph == NULL) {
417 return FALSE;
418 }
419
420 for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
421 GList *lpc2 = NULL;
422 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
423
424 if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
425 continue;
426 }
427
428 for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
429 pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
430
431 if ((action->type != pcmk__cluster_graph_action)
432 || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
433 continue;
434 }
435
436 task = crm_element_value(action->xml, PCMK_XA_OPERATION);
437 if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
438 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
439 last_action = action->xml;
440 pcmk__update_graph(graph, action);
441 crm_notice("Failing action %d (%s): fencer terminated",
442 action->id, pcmk__xe_id(action->xml));
443 }
444 }
445 }
446
447 if (last_action != NULL) {
448 crm_warn("Fencer failure resulted in unrunnable actions");
449 abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
450 return TRUE;
451 }
452
453 return FALSE;
454 }
455
456 static void
457 tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
458 {
459 te_cleanup_stonith_history_sync(st, FALSE);
460
461 if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
462 crm_err("Lost fencer connection (will attempt to reconnect)");
463 if (!mainloop_timer_running(controld_fencer_connect_timer)) {
464 mainloop_timer_start(controld_fencer_connect_timer);
465 }
466 } else {
467 crm_info("Disconnected from fencer");
468 }
469
470 if (stonith_api) {
471
472
473
474 if (stonith_api->state != stonith_disconnected) {
475 stonith_api->cmds->disconnect(st);
476 }
477 stonith_api->cmds->remove_notification(stonith_api, NULL);
478 }
479
480 if (AM_I_DC) {
481 fail_incompletable_stonith(controld_globals.transition_graph);
482 trigger_graph();
483 }
484 }
485
486
487
488
489
490
491
492
493 static void
494 handle_fence_notification(stonith_t *st, stonith_event_t *event)
495 {
496 bool succeeded = true;
497 const char *executioner = "the cluster";
498 const char *client = "a client";
499 const char *reason = NULL;
500 int exec_status;
501
502 if (te_client_id == NULL) {
503 te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
504 (unsigned long) getpid());
505 }
506
507 if (event == NULL) {
508 crm_err("Notify data not found");
509 return;
510 }
511
512 if (event->executioner != NULL) {
513 executioner = event->executioner;
514 }
515 if (event->client_origin != NULL) {
516 client = event->client_origin;
517 }
518
519 exec_status = stonith__event_execution_status(event);
520 if ((stonith__event_exit_status(event) != CRM_EX_OK)
521 || (exec_status != PCMK_EXEC_DONE)) {
522 succeeded = false;
523 if (exec_status == PCMK_EXEC_DONE) {
524 exec_status = PCMK_EXEC_ERROR;
525 }
526 }
527 reason = stonith__event_exit_reason(event);
528
529 crmd_alert_fencing_op(event);
530
531 if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) {
532
533 if (succeeded) {
534 crm_notice("%s was unfenced by %s at the request of %s@%s",
535 event->target, executioner, client, event->origin);
536 } else {
537 crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
538 event->target, executioner,
539 pcmk_exec_status_str(exec_status),
540 ((reason == NULL)? "" : ": "),
541 ((reason == NULL)? "" : reason),
542 stonith__event_exit_status(event));
543 }
544 return;
545 }
546
547 if (succeeded && controld_is_local_node(event->target)) {
548
549
550
551
552
553
554
555
556
557 crm_crit("We were allegedly just fenced by %s for %s!",
558 executioner, event->origin);
559 if (fence_reaction_panic) {
560 pcmk__panic("Notified of own fencing");
561 } else {
562 crm_exit(CRM_EX_FATAL);
563 }
564 return;
565 }
566
567
568
569
570
571 if (!AM_I_DC) {
572 if (succeeded) {
573 st_fail_count_reset(event->target);
574 } else {
575 st_fail_count_increment(event->target);
576 }
577 }
578
579 crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
580 "%s%s%s%s " QB_XS " event=%s",
581 event->target, (succeeded? "" : " not"),
582 event->action, executioner, client, event->origin,
583 (succeeded? "OK" : pcmk_exec_status_str(exec_status)),
584 ((reason == NULL)? "" : " ("),
585 ((reason == NULL)? "" : reason),
586 ((reason == NULL)? "" : ")"),
587 event->id);
588
589 if (succeeded) {
590 const uint32_t flags = pcmk__node_search_any
591 |pcmk__node_search_cluster_cib;
592
593 pcmk__node_status_t *peer = pcmk__search_node_caches(0, event->target,
594 flags);
595 const char *uuid = NULL;
596
597 if (peer == NULL) {
598 return;
599 }
600
601 uuid = pcmk__cluster_node_uuid(peer);
602
603 if (AM_I_DC) {
604
605 send_stonith_update(NULL, event->target, uuid);
606
607
608
609
610
611
612
613
614
615 if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
616
617
618
619 crm_info("External fencing operation from %s fenced %s",
620 client, event->target);
621 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
622 "External Fencing Operation", NULL);
623 }
624
625 } else if (pcmk__str_eq(controld_globals.dc_name, event->target,
626 pcmk__str_null_matches|pcmk__str_casei)
627 && !pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
628
629
630 if (controld_globals.dc_name != NULL) {
631 crm_notice("Fencing target %s was our DC", event->target);
632 } else {
633 crm_notice("Fencing target %s may have been our DC",
634 event->target);
635 }
636
637
638
639
640
641 if (controld_is_local_node(event->executioner)) {
642 send_stonith_update(NULL, event->target, uuid);
643 }
644 add_stonith_cleanup(event->target);
645 }
646
647
648
649
650
651
652 if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
653 remote_ra_fail(event->target);
654 }
655
656 crmd_peer_down(peer, TRUE);
657 }
658 }
659
660
661
662
663
664
665
666
667
668
669 gboolean
670 controld_timer_fencer_connect(gpointer user_data)
671 {
672 int rc = pcmk_ok;
673
674 if (stonith_api == NULL) {
675 stonith_api = stonith_api_new();
676 if (stonith_api == NULL) {
677 crm_err("Could not connect to fencer: API memory allocation failed");
678 return G_SOURCE_REMOVE;
679 }
680 }
681
682 if (stonith_api->state != stonith_disconnected) {
683 crm_trace("Already connected to fencer, no need to retry");
684 return G_SOURCE_REMOVE;
685 }
686
687 if (user_data == NULL) {
688
689 rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
690 if (rc != pcmk_ok) {
691 crm_err("Could not connect to fencer in 30 attempts: %s "
692 QB_XS " rc=%d", pcmk_strerror(rc), rc);
693 }
694 } else {
695
696 rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
697
698 if (controld_fencer_connect_timer == NULL) {
699 controld_fencer_connect_timer =
700 mainloop_timer_add("controld_fencer_connect", 1000,
701 TRUE, controld_timer_fencer_connect,
702 GINT_TO_POINTER(TRUE));
703 }
704
705 if (rc != pcmk_ok) {
706 if (pcmk_is_set(controld_globals.fsa_input_register,
707 R_ST_REQUIRED)) {
708 crm_notice("Fencer connection failed (will retry): %s "
709 QB_XS " rc=%d", pcmk_strerror(rc), rc);
710
711 if (!mainloop_timer_running(controld_fencer_connect_timer)) {
712 mainloop_timer_start(controld_fencer_connect_timer);
713 }
714
715 return G_SOURCE_CONTINUE;
716 } else {
717 crm_info("Fencer connection failed (ignoring because no longer required): %s "
718 QB_XS " rc=%d", pcmk_strerror(rc), rc);
719 }
720 return G_SOURCE_REMOVE;
721 }
722 }
723
724 if (rc == pcmk_ok) {
725 stonith_api_operations_t *cmds = stonith_api->cmds;
726
727 cmds->register_notification(stonith_api,
728 PCMK__VALUE_ST_NOTIFY_DISCONNECT,
729 tengine_stonith_connection_destroy);
730 cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_FENCE,
731 handle_fence_notification);
732 cmds->register_notification(stonith_api,
733 PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
734 tengine_stonith_history_synced);
735 te_trigger_stonith_history_sync(TRUE);
736 crm_notice("Fencer successfully connected");
737 }
738
739 return G_SOURCE_REMOVE;
740 }
741
742 void
743 controld_disconnect_fencer(bool destroy)
744 {
745 if (stonith_api) {
746
747 controld_clear_fsa_input_flags(R_ST_REQUIRED);
748
749 if (stonith_api->state != stonith_disconnected) {
750 stonith_api->cmds->disconnect(stonith_api);
751 }
752 stonith_api->cmds->remove_notification(stonith_api, NULL);
753 }
754 if (destroy) {
755 if (stonith_api) {
756 stonith_api->cmds->free(stonith_api);
757 stonith_api = NULL;
758 }
759 if (controld_fencer_connect_timer) {
760 mainloop_timer_del(controld_fencer_connect_timer);
761 controld_fencer_connect_timer = NULL;
762 }
763 if (te_client_id) {
764 free(te_client_id);
765 te_client_id = NULL;
766 }
767 }
768 }
769
770 static gboolean
771 do_stonith_history_sync(gpointer user_data)
772 {
773 if (stonith_api && (stonith_api->state != stonith_disconnected)) {
774 stonith_history_t *history = NULL;
775
776 te_cleanup_stonith_history_sync(stonith_api, FALSE);
777 stonith_api->cmds->history(stonith_api,
778 st_opt_sync_call | st_opt_broadcast,
779 NULL, &history, 5);
780 stonith_history_free(history);
781 return TRUE;
782 } else {
783 crm_info("Skip triggering stonith history-sync as stonith is disconnected");
784 return FALSE;
785 }
786 }
787
788 static void
789 tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
790 {
791 char *uuid = NULL;
792 int stonith_id = -1;
793 int transition_id = -1;
794 pcmk__graph_action_t *action = NULL;
795 const char *target = NULL;
796
797 if ((data == NULL) || (data->userdata == NULL)) {
798 crm_err("Ignoring fence operation %d result: "
799 "No transition key given (bug?)",
800 ((data == NULL)? -1 : data->call_id));
801 return;
802 }
803
804 if (!AM_I_DC) {
805 const char *reason = stonith__exit_reason(data);
806
807 if (reason == NULL) {
808 reason = pcmk_exec_status_str(stonith__execution_status(data));
809 }
810 crm_notice("Result of fence operation %d: %d (%s) " QB_XS " key=%s",
811 data->call_id, stonith__exit_status(data), reason,
812 (const char *) data->userdata);
813 return;
814 }
815
816 CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
817 &stonith_id, NULL),
818 goto bail);
819
820 if (controld_globals.transition_graph->complete || (stonith_id < 0)
821 || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
822 || (controld_globals.transition_graph->id != transition_id)) {
823 crm_info("Ignoring fence operation %d result: "
824 "Not from current transition " QB_XS
825 " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
826 data->call_id,
827 pcmk__btoa(controld_globals.transition_graph->complete),
828 stonith_id, uuid, controld_globals.te_uuid, transition_id,
829 controld_globals.transition_graph->id);
830 goto bail;
831 }
832
833 action = controld_get_action(stonith_id);
834 if (action == NULL) {
835 crm_err("Ignoring fence operation %d result: "
836 "Action %d not found in transition graph (bug?) "
837 QB_XS " uuid=%s transition=%d",
838 data->call_id, stonith_id, uuid, transition_id);
839 goto bail;
840 }
841
842 target = crm_element_value(action->xml, PCMK__META_ON_NODE);
843 if (target == NULL) {
844 crm_err("Ignoring fence operation %d result: No target given (bug?)",
845 data->call_id);
846 goto bail;
847 }
848
849 stop_te_timer(action);
850 if (stonith__exit_status(data) == CRM_EX_OK) {
851 const char *uuid = crm_element_value(action->xml,
852 PCMK__META_ON_NODE_UUID);
853 const char *op = crm_meta_value(action->params,
854 PCMK__META_STONITH_ACTION);
855
856 crm_info("Fence operation %d for %s succeeded", data->call_id, target);
857 if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
858 te_action_confirmed(action, NULL);
859 if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) {
860 const char *value = NULL;
861 char *now = pcmk__ttoa(time(NULL));
862 gboolean is_remote_node = FALSE;
863
864
865
866
867
868
869
870
871 if (g_hash_table_lookup(pcmk__remote_peer_cache,
872 uuid) != NULL) {
873 is_remote_node = TRUE;
874 }
875
876 update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
877 is_remote_node);
878 free(now);
879
880 value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL);
881 update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
882 is_remote_node);
883
884 value = crm_meta_value(action->params,
885 PCMK__META_DIGESTS_SECURE);
886 update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
887 is_remote_node);
888
889 } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
890 send_stonith_update(action, target, uuid);
891 pcmk__set_graph_action_flags(action,
892 pcmk__graph_action_sent_update);
893 }
894 }
895 st_fail_count_reset(target);
896
897 } else {
898 enum pcmk__graph_next abort_action = pcmk__graph_restart;
899 int status = stonith__execution_status(data);
900 const char *reason = stonith__exit_reason(data);
901
902 if (reason == NULL) {
903 if (status == PCMK_EXEC_DONE) {
904 reason = "Agent returned error";
905 } else {
906 reason = pcmk_exec_status_str(status);
907 }
908 }
909 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
910
911
912
913
914 if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
915 crm_warn("Fence operation %d for %s failed: %s "
916 "(aborting transition and giving up for now)",
917 data->call_id, target, reason);
918 abort_action = pcmk__graph_wait;
919 } else {
920 crm_notice("Fence operation %d for %s failed: %s "
921 "(aborting transition)", data->call_id, target, reason);
922 }
923
924
925
926
927
928 st_fail_count_increment(target);
929 abort_for_stonith_failure(abort_action, target, NULL);
930 }
931
932 pcmk__update_graph(controld_globals.transition_graph, action);
933 trigger_graph();
934
935 bail:
936 free(data->userdata);
937 free(uuid);
938 return;
939 }
940
941 static int
942 fence_with_delay(const char *target, const char *type, int delay)
943 {
944 uint32_t options = st_opt_none;
945 int timeout_sec = pcmk__timeout_ms2s(controld_globals.transition_graph->stonith_timeout);
946
947 if (crmd_join_phase_count(controld_join_confirmed) == 1) {
948 stonith__set_call_options(options, target, st_opt_allow_self_fencing);
949 }
950 return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
951 type, timeout_sec, 0, delay);
952 }
953
954
955
956
957
958
959
960
961
962
963 int
964 controld_execute_fence_action(pcmk__graph_t *graph,
965 pcmk__graph_action_t *action)
966 {
967 int rc = 0;
968 const char *id = pcmk__xe_id(action->xml);
969 const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
970 const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
971 const char *type = crm_meta_value(action->params,
972 PCMK__META_STONITH_ACTION);
973 char *transition_key = NULL;
974 const char *priority_delay = NULL;
975 int delay_i = 0;
976 gboolean invalid_action = FALSE;
977 int stonith_timeout = pcmk__timeout_ms2s(controld_globals.transition_graph->stonith_timeout);
978
979 CRM_CHECK(id != NULL, invalid_action = TRUE);
980 CRM_CHECK(uuid != NULL, invalid_action = TRUE);
981 CRM_CHECK(type != NULL, invalid_action = TRUE);
982 CRM_CHECK(target != NULL, invalid_action = TRUE);
983
984 if (invalid_action) {
985 crm_log_xml_warn(action->xml, "BadAction");
986 return EPROTO;
987 }
988
989 priority_delay = crm_meta_value(action->params,
990 PCMK_OPT_PRIORITY_FENCING_DELAY);
991
992 crm_notice("Requesting fencing (%s) targeting node %s "
993 QB_XS " action=%s timeout=%i%s%s",
994 type, target, id, stonith_timeout,
995 priority_delay ? " priority_delay=" : "",
996 priority_delay ? priority_delay : "");
997
998
999 controld_timer_fencer_connect(NULL);
1000
1001 pcmk__scan_min_int(priority_delay, &delay_i, 0);
1002 rc = fence_with_delay(target, type, delay_i);
1003 transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
1004 action->id, 0,
1005 controld_globals.te_uuid),
1006 stonith_api->cmds->register_callback(stonith_api, rc,
1007 (stonith_timeout
1008 + (delay_i > 0 ? delay_i : 0)),
1009 st_opt_timeout_updates, transition_key,
1010 "tengine_stonith_callback",
1011 tengine_stonith_callback);
1012 return pcmk_rc_ok;
1013 }
1014
1015 bool
1016 controld_verify_stonith_watchdog_timeout(const char *value)
1017 {
1018 long long st_timeout = (value != NULL)? crm_get_msec(value) : 0;
1019 const char *our_nodename = controld_globals.cluster->priv->node_name;
1020
1021 if (st_timeout == 0
1022 || (stonith_api && (stonith_api->state != stonith_disconnected) &&
1023 stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
1024 our_nodename))) {
1025 return pcmk__valid_stonith_watchdog_timeout(value);
1026 }
1027 return true;
1028 }
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040 static crm_trigger_t *stonith_history_sync_trigger = NULL;
1041 static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
1042 static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
1043
1044 void
1045 te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
1046 {
1047 if (free_timers) {
1048 mainloop_timer_del(stonith_history_sync_timer_short);
1049 stonith_history_sync_timer_short = NULL;
1050 mainloop_timer_del(stonith_history_sync_timer_long);
1051 stonith_history_sync_timer_long = NULL;
1052 } else {
1053 mainloop_timer_stop(stonith_history_sync_timer_short);
1054 mainloop_timer_stop(stonith_history_sync_timer_long);
1055 }
1056
1057 if (st) {
1058 st->cmds->remove_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED);
1059 }
1060 }
1061
1062 static void
1063 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
1064 {
1065 te_cleanup_stonith_history_sync(st, FALSE);
1066 crm_debug("Fence-history synced - cancel all timers");
1067 }
1068
1069 static gboolean
1070 stonith_history_sync_set_trigger(gpointer user_data)
1071 {
1072 mainloop_set_trigger(stonith_history_sync_trigger);
1073 return FALSE;
1074 }
1075
1076 void
1077 te_trigger_stonith_history_sync(bool long_timeout)
1078 {
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096 if (stonith_history_sync_trigger == NULL) {
1097 stonith_history_sync_trigger =
1098 mainloop_add_trigger(G_PRIORITY_LOW,
1099 do_stonith_history_sync, NULL);
1100 }
1101
1102 if (long_timeout) {
1103 if(stonith_history_sync_timer_long == NULL) {
1104 stonith_history_sync_timer_long =
1105 mainloop_timer_add("history_sync_long", 30000,
1106 FALSE, stonith_history_sync_set_trigger,
1107 NULL);
1108 }
1109 crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
1110 mainloop_timer_start(stonith_history_sync_timer_long);
1111 } else {
1112 if(stonith_history_sync_timer_short == NULL) {
1113 stonith_history_sync_timer_short =
1114 mainloop_timer_add("history_sync_short", 5000,
1115 FALSE, stonith_history_sync_set_trigger,
1116 NULL);
1117 }
1118 crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
1119 mainloop_timer_start(stonith_history_sync_timer_short);
1120 }
1121
1122 }
1123
1124