This source file includes following definitions.
- update_stonith_max_attempts
- set_fence_reaction
- controld_configure_fencing
- too_many_st_failures
- st_fail_count_reset
- st_fail_count_increment
- cib_fencing_updated
- update_node_state_after_fencing
- abort_for_stonith_failure
- add_stonith_cleanup
- remove_stonith_cleanup
- purge_stonith_cleanup
- execute_stonith_cleanup
- fail_incompletable_stonith
- tengine_stonith_connection_destroy
- handle_fence_notification
- controld_timer_fencer_connect
- controld_disconnect_fencer
- do_stonith_history_sync
- tengine_stonith_callback
- fence_with_delay
- controld_execute_fence_action
- controld_verify_stonith_watchdog_timeout
- te_cleanup_stonith_history_sync
- tengine_stonith_history_synced
- stonith_history_sync_set_trigger
- te_trigger_stonith_history_sync
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include <crm/crm.h>
12 #include <crm/common/xml.h>
13 #include <crm/stonith-ng.h>
14 #include <crm/fencing/internal.h>
15
16 #include <pacemaker-controld.h>
17
18 static void
19 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
20
21
22
23
24
25
26
27
28
29 struct st_fail_rec {
30 int count;
31 };
32
33 #define DEFAULT_STONITH_MAX_ATTEMPTS 10
34
35 static bool fence_reaction_panic = false;
36 static unsigned long int stonith_max_attempts = DEFAULT_STONITH_MAX_ATTEMPTS;
37 static GHashTable *stonith_failures = NULL;
38
39
40
41
42
43
44
45 static void
46 update_stonith_max_attempts(const char *value)
47 {
48 int score = 0;
49 int rc = pcmk_parse_score(value, &score, DEFAULT_STONITH_MAX_ATTEMPTS);
50
51
52 CRM_CHECK((rc == pcmk_rc_ok) && (score > 0), return);
53
54 if (stonith_max_attempts != score) {
55 crm_debug("Maximum fencing attempts per transition is now %d (was %lu)",
56 score, stonith_max_attempts);
57 }
58 stonith_max_attempts = score;
59 }
60
61
62
63
64
65
66
67 static void
68 set_fence_reaction(const char *reaction_s)
69 {
70 if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
71 fence_reaction_panic = true;
72
73 } else {
74 if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) {
75 crm_warn("Invalid value '%s' for %s, using 'stop'",
76 reaction_s, PCMK_OPT_FENCE_REACTION);
77 }
78 fence_reaction_panic = false;
79 }
80 }
81
82
83
84
85
86
87
88 void
89 controld_configure_fencing(GHashTable *options)
90 {
91 const char *value = NULL;
92
93 value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION);
94 set_fence_reaction(value);
95
96 value = g_hash_table_lookup(options, PCMK_OPT_STONITH_MAX_ATTEMPTS);
97 update_stonith_max_attempts(value);
98 }
99
100 static gboolean
101 too_many_st_failures(const char *target)
102 {
103 GHashTableIter iter;
104 const char *key = NULL;
105 struct st_fail_rec *value = NULL;
106
107 if (stonith_failures == NULL) {
108 return FALSE;
109 }
110
111 if (target == NULL) {
112 g_hash_table_iter_init(&iter, stonith_failures);
113 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
114 (gpointer *) &value)) {
115
116 if (value->count >= stonith_max_attempts) {
117 target = (const char*)key;
118 goto too_many;
119 }
120 }
121 } else {
122 value = g_hash_table_lookup(stonith_failures, target);
123 if ((value != NULL) && (value->count >= stonith_max_attempts)) {
124 goto too_many;
125 }
126 }
127 return FALSE;
128
129 too_many:
130 crm_warn("Too many failures (%d) to fence %s, giving up",
131 value->count, target);
132 return TRUE;
133 }
134
135
136
137
138
139
140
141 void
142 st_fail_count_reset(const char *target)
143 {
144 if (stonith_failures == NULL) {
145 return;
146 }
147
148 if (target) {
149 struct st_fail_rec *rec = NULL;
150
151 rec = g_hash_table_lookup(stonith_failures, target);
152 if (rec) {
153 rec->count = 0;
154 }
155 } else {
156 GHashTableIter iter;
157 const char *key = NULL;
158 struct st_fail_rec *rec = NULL;
159
160 g_hash_table_iter_init(&iter, stonith_failures);
161 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
162 (gpointer *) &rec)) {
163 rec->count = 0;
164 }
165 }
166 }
167
168 static void
169 st_fail_count_increment(const char *target)
170 {
171 struct st_fail_rec *rec = NULL;
172
173 if (stonith_failures == NULL) {
174 stonith_failures = pcmk__strkey_table(free, free);
175 }
176
177 rec = g_hash_table_lookup(stonith_failures, target);
178 if (rec) {
179 rec->count++;
180 } else {
181 rec = malloc(sizeof(struct st_fail_rec));
182 if(rec == NULL) {
183 return;
184 }
185
186 rec->count = 1;
187 g_hash_table_insert(stonith_failures, pcmk__str_copy(target), rec);
188 }
189 }
190
191
192
193
194 static void
195 cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
196 void *user_data)
197 {
198 if (rc < pcmk_ok) {
199 crm_err("Fencing update %d for %s: failed - %s (%d)",
200 call_id, (char *)user_data, pcmk_strerror(rc), rc);
201 crm_log_xml_warn(msg, "Failed update");
202 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown,
203 "CIB update failed", NULL);
204
205 } else {
206 crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
207 }
208 }
209
210
211
212
213
214
215
216
217 static void
218 update_node_state_after_fencing(const char *target, const char *target_xml_id)
219 {
220 int rc = pcmk_ok;
221 pcmk__node_status_t *peer = NULL;
222 xmlNode *node_state = NULL;
223
224
225
226
227
228
229 uint32_t flags = controld_node_update_join|controld_node_update_expected;
230
231 CRM_CHECK((target != NULL) && (target_xml_id != NULL), return);
232
233
234 peer = pcmk__get_node(0, target, target_xml_id, pcmk__node_search_any);
235 CRM_CHECK(peer != NULL, return);
236
237 if (peer->state == NULL) {
238
239
240
241
242 flags |= controld_node_update_cluster;
243 }
244
245 if (peer->xml_id == NULL) {
246 crm_info("Recording XML ID '%s' for node '%s'", target_xml_id, target);
247 peer->xml_id = pcmk__str_copy(target_xml_id);
248 }
249
250 crmd_peer_down(peer, TRUE);
251
252 node_state = create_node_state_update(peer, flags, NULL, __func__);
253 crm_xml_add(node_state, PCMK_XA_ID, target_xml_id);
254
255 if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
256 char *now_s = pcmk__ttoa(time(NULL));
257
258 crm_xml_add(node_state, PCMK__XA_NODE_FENCED, now_s);
259 free(now_s);
260 }
261
262 rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
263 PCMK_XE_STATUS, node_state,
264 cib_can_create);
265 pcmk__xml_free(node_state);
266
267 crm_debug("Updating node state for %s after fencing (call %d)", target, rc);
268 fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated);
269
270 controld_delete_node_state(peer->name, controld_section_all, cib_none);
271 }
272
273
274
275
276
277
278
279
280
281 static void
282 abort_for_stonith_failure(enum pcmk__graph_next abort_action,
283 const char *target, const xmlNode *reason)
284 {
285
286
287
288 if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
289 abort_action = pcmk__graph_wait;
290 }
291 abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed",
292 reason);
293 }
294
295
296
297
298
299
300
301
302
303
304 static GList *stonith_cleanup_list = NULL;
305
306
307
308
309
310
311
312 void
313 add_stonith_cleanup(const char *target) {
314 stonith_cleanup_list = g_list_append(stonith_cleanup_list,
315 pcmk__str_copy(target));
316 }
317
318
319
320
321
322
323
324 void
325 remove_stonith_cleanup(const char *target)
326 {
327 GList *iter = stonith_cleanup_list;
328
329 while (iter != NULL) {
330 GList *tmp = iter;
331 char *iter_name = tmp->data;
332
333 iter = iter->next;
334 if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
335 crm_trace("Removing %s from the cleanup list", iter_name);
336 stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
337 free(iter_name);
338 }
339 }
340 }
341
342
343
344
345
346 void
347 purge_stonith_cleanup(void)
348 {
349 if (stonith_cleanup_list) {
350 GList *iter = NULL;
351
352 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
353 char *target = iter->data;
354
355 crm_info("Purging %s from stonith cleanup list", target);
356 free(target);
357 }
358 g_list_free(stonith_cleanup_list);
359 stonith_cleanup_list = NULL;
360 }
361 }
362
363
364
365
366
367 void
368 execute_stonith_cleanup(void)
369 {
370 GList *iter;
371
372 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
373 char *target = iter->data;
374 pcmk__node_status_t *target_node =
375 pcmk__get_node(0, target, NULL, pcmk__node_search_cluster_member);
376 const char *uuid = pcmk__cluster_get_xml_id(target_node);
377
378 crm_notice("Marking %s, target of a previous stonith action, as clean", target);
379 update_node_state_after_fencing(target, uuid);
380 free(target);
381 }
382 g_list_free(stonith_cleanup_list);
383 stonith_cleanup_list = NULL;
384 }
385
386
387
388
389
390
391
392
393
394 static stonith_t *stonith_api = NULL;
395 static mainloop_timer_t *controld_fencer_connect_timer = NULL;
396 static char *te_client_id = NULL;
397
398 static gboolean
399 fail_incompletable_stonith(pcmk__graph_t *graph)
400 {
401 GList *lpc = NULL;
402 const char *task = NULL;
403 xmlNode *last_action = NULL;
404
405 if (graph == NULL) {
406 return FALSE;
407 }
408
409 for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
410 GList *lpc2 = NULL;
411 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
412
413 if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
414 continue;
415 }
416
417 for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
418 pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
419
420 if ((action->type != pcmk__cluster_graph_action)
421 || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
422 continue;
423 }
424
425 task = crm_element_value(action->xml, PCMK_XA_OPERATION);
426 if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
427 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
428 last_action = action->xml;
429 pcmk__update_graph(graph, action);
430 crm_notice("Failing action %d (%s): fencer terminated",
431 action->id, pcmk__xe_id(action->xml));
432 }
433 }
434 }
435
436 if (last_action != NULL) {
437 crm_warn("Fencer failure resulted in unrunnable actions");
438 abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
439 return TRUE;
440 }
441
442 return FALSE;
443 }
444
445 static void
446 tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
447 {
448 te_cleanup_stonith_history_sync(st, FALSE);
449
450 if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
451 crm_err("Lost fencer connection (will attempt to reconnect)");
452 if (!mainloop_timer_running(controld_fencer_connect_timer)) {
453 mainloop_timer_start(controld_fencer_connect_timer);
454 }
455 } else {
456 crm_info("Disconnected from fencer");
457 }
458
459 if (stonith_api) {
460
461
462
463 if (stonith_api->state != stonith_disconnected) {
464 stonith_api->cmds->disconnect(st);
465 }
466 stonith_api->cmds->remove_notification(stonith_api, NULL);
467 }
468
469 if (AM_I_DC) {
470 fail_incompletable_stonith(controld_globals.transition_graph);
471 trigger_graph();
472 }
473 }
474
475
476
477
478
479
480
481
482 static void
483 handle_fence_notification(stonith_t *st, stonith_event_t *event)
484 {
485 bool succeeded = true;
486 const char *executioner = "the cluster";
487 const char *client = "a client";
488 const char *reason = NULL;
489 int exec_status;
490
491 if (te_client_id == NULL) {
492 te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
493 (unsigned long) getpid());
494 }
495
496 if (event == NULL) {
497 crm_err("Notify data not found");
498 return;
499 }
500
501 if (event->executioner != NULL) {
502 executioner = event->executioner;
503 }
504 if (event->client_origin != NULL) {
505 client = event->client_origin;
506 }
507
508 exec_status = stonith__event_execution_status(event);
509 if ((stonith__event_exit_status(event) != CRM_EX_OK)
510 || (exec_status != PCMK_EXEC_DONE)) {
511 succeeded = false;
512 if (exec_status == PCMK_EXEC_DONE) {
513 exec_status = PCMK_EXEC_ERROR;
514 }
515 }
516 reason = stonith__event_exit_reason(event);
517
518 crmd_alert_fencing_op(event);
519
520 if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) {
521
522 if (succeeded) {
523 crm_notice("%s was unfenced by %s at the request of %s@%s",
524 event->target, executioner, client, event->origin);
525 } else {
526 crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
527 event->target, executioner,
528 pcmk_exec_status_str(exec_status),
529 ((reason == NULL)? "" : ": "),
530 ((reason == NULL)? "" : reason),
531 stonith__event_exit_status(event));
532 }
533 return;
534 }
535
536 if (succeeded && controld_is_local_node(event->target)) {
537
538
539
540
541
542
543
544
545
546 crm_crit("We were allegedly just fenced by %s for %s!",
547 executioner, event->origin);
548 if (fence_reaction_panic) {
549 pcmk__panic("Notified of own fencing");
550 } else {
551 crm_exit(CRM_EX_FATAL);
552 }
553 return;
554 }
555
556
557
558
559
560 if (!AM_I_DC) {
561 if (succeeded) {
562 st_fail_count_reset(event->target);
563 } else {
564 st_fail_count_increment(event->target);
565 }
566 }
567
568 crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
569 "%s%s%s%s " QB_XS " event=%s",
570 event->target, (succeeded? "" : " not"),
571 event->action, executioner, client, event->origin,
572 (succeeded? "OK" : pcmk_exec_status_str(exec_status)),
573 ((reason == NULL)? "" : " ("),
574 ((reason == NULL)? "" : reason),
575 ((reason == NULL)? "" : ")"),
576 event->id);
577
578 if (succeeded) {
579 const uint32_t flags = pcmk__node_search_any
580 |pcmk__node_search_cluster_cib;
581
582 pcmk__node_status_t *peer = pcmk__search_node_caches(0, event->target,
583 NULL, flags);
584 const char *uuid = NULL;
585
586 if (peer == NULL) {
587 return;
588 }
589
590 uuid = pcmk__cluster_get_xml_id(peer);
591
592 if (AM_I_DC) {
593
594 update_node_state_after_fencing(event->target, uuid);
595
596
597
598
599
600
601
602
603
604 if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
605
606
607
608 crm_info("External fencing operation from %s fenced %s",
609 client, event->target);
610 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
611 "External Fencing Operation", NULL);
612 }
613
614 } else if (pcmk__str_eq(controld_globals.dc_name, event->target,
615 pcmk__str_null_matches|pcmk__str_casei)
616 && !pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
617
618
619 if (controld_globals.dc_name != NULL) {
620 crm_notice("Fencing target %s was our DC", event->target);
621 } else {
622 crm_notice("Fencing target %s may have been our DC",
623 event->target);
624 }
625
626
627
628
629
630 if (controld_is_local_node(event->executioner)) {
631 update_node_state_after_fencing(event->target, uuid);
632 }
633 add_stonith_cleanup(event->target);
634 }
635
636
637
638
639
640
641 if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
642 remote_ra_fail(event->target);
643 }
644
645 crmd_peer_down(peer, TRUE);
646 }
647 }
648
649
650
651
652
653
654
655
656
657
658 gboolean
659 controld_timer_fencer_connect(gpointer user_data)
660 {
661 int rc = pcmk_ok;
662
663 if (stonith_api == NULL) {
664 stonith_api = stonith__api_new();
665 if (stonith_api == NULL) {
666 crm_err("Could not connect to fencer: API memory allocation failed");
667 return G_SOURCE_REMOVE;
668 }
669 }
670
671 if (stonith_api->state != stonith_disconnected) {
672 crm_trace("Already connected to fencer, no need to retry");
673 return G_SOURCE_REMOVE;
674 }
675
676 if (user_data == NULL) {
677
678 rc = stonith__api_connect_retry(stonith_api, crm_system_name, 30);
679 if (rc != pcmk_rc_ok) {
680 crm_err("Could not connect to fencer in 30 attempts: %s "
681 QB_XS " rc=%d", pcmk_rc_str(rc), rc);
682 }
683 } else {
684
685 rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
686
687 if (controld_fencer_connect_timer == NULL) {
688 controld_fencer_connect_timer =
689 mainloop_timer_add("controld_fencer_connect", 1000,
690 TRUE, controld_timer_fencer_connect,
691 GINT_TO_POINTER(TRUE));
692 }
693
694 if (rc != pcmk_ok) {
695 if (pcmk_is_set(controld_globals.fsa_input_register,
696 R_ST_REQUIRED)) {
697 crm_notice("Fencer connection failed (will retry): %s "
698 QB_XS " rc=%d", pcmk_strerror(rc), rc);
699
700 if (!mainloop_timer_running(controld_fencer_connect_timer)) {
701 mainloop_timer_start(controld_fencer_connect_timer);
702 }
703
704 return G_SOURCE_CONTINUE;
705 } else {
706 crm_info("Fencer connection failed (ignoring because no longer required): %s "
707 QB_XS " rc=%d", pcmk_strerror(rc), rc);
708 }
709 return G_SOURCE_REMOVE;
710 }
711 }
712
713 if (rc == pcmk_ok) {
714 stonith_api_operations_t *cmds = stonith_api->cmds;
715
716 cmds->register_notification(stonith_api,
717 PCMK__VALUE_ST_NOTIFY_DISCONNECT,
718 tengine_stonith_connection_destroy);
719 cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_FENCE,
720 handle_fence_notification);
721 cmds->register_notification(stonith_api,
722 PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
723 tengine_stonith_history_synced);
724 te_trigger_stonith_history_sync(TRUE);
725 crm_notice("Fencer successfully connected");
726 }
727
728 return G_SOURCE_REMOVE;
729 }
730
731 void
732 controld_disconnect_fencer(bool destroy)
733 {
734 if (stonith_api) {
735
736 controld_clear_fsa_input_flags(R_ST_REQUIRED);
737
738 if (stonith_api->state != stonith_disconnected) {
739 stonith_api->cmds->disconnect(stonith_api);
740 }
741 stonith_api->cmds->remove_notification(stonith_api, NULL);
742 }
743 if (destroy) {
744 if (stonith_api) {
745 stonith_api->cmds->free(stonith_api);
746 stonith_api = NULL;
747 }
748 if (controld_fencer_connect_timer) {
749 mainloop_timer_del(controld_fencer_connect_timer);
750 controld_fencer_connect_timer = NULL;
751 }
752 if (te_client_id) {
753 free(te_client_id);
754 te_client_id = NULL;
755 }
756 }
757 }
758
759 static gboolean
760 do_stonith_history_sync(gpointer user_data)
761 {
762 if (stonith_api && (stonith_api->state != stonith_disconnected)) {
763 stonith_history_t *history = NULL;
764
765 te_cleanup_stonith_history_sync(stonith_api, FALSE);
766 stonith_api->cmds->history(stonith_api,
767 st_opt_sync_call | st_opt_broadcast,
768 NULL, &history, 5);
769 stonith__history_free(history);
770 return TRUE;
771 } else {
772 crm_info("Skip triggering stonith history-sync as stonith is disconnected");
773 return FALSE;
774 }
775 }
776
777 static void
778 tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
779 {
780 char *uuid = NULL;
781 int stonith_id = -1;
782 int transition_id = -1;
783 pcmk__graph_action_t *action = NULL;
784 const char *target = NULL;
785
786 if ((data == NULL) || (data->userdata == NULL)) {
787 crm_err("Ignoring fence operation %d result: "
788 "No transition key given (bug?)",
789 ((data == NULL)? -1 : data->call_id));
790 return;
791 }
792
793 if (!AM_I_DC) {
794 const char *reason = stonith__exit_reason(data);
795
796 if (reason == NULL) {
797 reason = pcmk_exec_status_str(stonith__execution_status(data));
798 }
799 crm_notice("Result of fence operation %d: %d (%s) " QB_XS " key=%s",
800 data->call_id, stonith__exit_status(data), reason,
801 (const char *) data->userdata);
802 return;
803 }
804
805 CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
806 &stonith_id, NULL),
807 goto bail);
808
809 if (controld_globals.transition_graph->complete || (stonith_id < 0)
810 || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
811 || (controld_globals.transition_graph->id != transition_id)) {
812 crm_info("Ignoring fence operation %d result: "
813 "Not from current transition " QB_XS
814 " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
815 data->call_id,
816 pcmk__btoa(controld_globals.transition_graph->complete),
817 stonith_id, uuid, controld_globals.te_uuid, transition_id,
818 controld_globals.transition_graph->id);
819 goto bail;
820 }
821
822 action = controld_get_action(stonith_id);
823 if (action == NULL) {
824 crm_err("Ignoring fence operation %d result: "
825 "Action %d not found in transition graph (bug?) "
826 QB_XS " uuid=%s transition=%d",
827 data->call_id, stonith_id, uuid, transition_id);
828 goto bail;
829 }
830
831 target = crm_element_value(action->xml, PCMK__META_ON_NODE);
832 if (target == NULL) {
833 crm_err("Ignoring fence operation %d result: No target given (bug?)",
834 data->call_id);
835 goto bail;
836 }
837
838 stop_te_timer(action);
839 if (stonith__exit_status(data) == CRM_EX_OK) {
840 const char *uuid = crm_element_value(action->xml,
841 PCMK__META_ON_NODE_UUID);
842 const char *op = crm_meta_value(action->params,
843 PCMK__META_STONITH_ACTION);
844
845 crm_info("Fence operation %d for %s succeeded", data->call_id, target);
846 if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
847 te_action_confirmed(action, NULL);
848 if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) {
849 const char *value = NULL;
850 char *now = pcmk__ttoa(time(NULL));
851 gboolean is_remote_node = FALSE;
852
853
854
855
856
857
858
859
860 if (g_hash_table_lookup(pcmk__remote_peer_cache,
861 uuid) != NULL) {
862 is_remote_node = TRUE;
863 }
864
865 update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
866 is_remote_node);
867 free(now);
868
869 value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL);
870 update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
871 is_remote_node);
872
873 value = crm_meta_value(action->params,
874 PCMK__META_DIGESTS_SECURE);
875 update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
876 is_remote_node);
877
878 } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
879 update_node_state_after_fencing(target, uuid);
880 pcmk__set_graph_action_flags(action,
881 pcmk__graph_action_sent_update);
882 }
883 }
884 st_fail_count_reset(target);
885
886 } else {
887 enum pcmk__graph_next abort_action = pcmk__graph_restart;
888 int status = stonith__execution_status(data);
889 const char *reason = stonith__exit_reason(data);
890
891 if (reason == NULL) {
892 if (status == PCMK_EXEC_DONE) {
893 reason = "Agent returned error";
894 } else {
895 reason = pcmk_exec_status_str(status);
896 }
897 }
898 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
899
900
901
902
903 if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
904 crm_warn("Fence operation %d for %s failed: %s "
905 "(aborting transition and giving up for now)",
906 data->call_id, target, reason);
907 abort_action = pcmk__graph_wait;
908 } else {
909 crm_notice("Fence operation %d for %s failed: %s "
910 "(aborting transition)", data->call_id, target, reason);
911 }
912
913
914
915
916
917 st_fail_count_increment(target);
918 abort_for_stonith_failure(abort_action, target, NULL);
919 }
920
921 pcmk__update_graph(controld_globals.transition_graph, action);
922 trigger_graph();
923
924 bail:
925 free(data->userdata);
926 free(uuid);
927 return;
928 }
929
930 static int
931 fence_with_delay(const char *target, const char *type, int delay)
932 {
933 uint32_t options = st_opt_none;
934 int timeout_sec = pcmk__timeout_ms2s(controld_globals.transition_graph->stonith_timeout);
935
936 if (crmd_join_phase_count(controld_join_confirmed) == 1) {
937 stonith__set_call_options(options, target, st_opt_allow_self_fencing);
938 }
939 return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
940 type, timeout_sec, 0, delay);
941 }
942
943
944
945
946
947
948
949
950
951
952 int
953 controld_execute_fence_action(pcmk__graph_t *graph,
954 pcmk__graph_action_t *action)
955 {
956 int rc = 0;
957 const char *id = pcmk__xe_id(action->xml);
958 const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
959 const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
960 const char *type = crm_meta_value(action->params,
961 PCMK__META_STONITH_ACTION);
962 char *transition_key = NULL;
963 const char *priority_delay = NULL;
964 int delay_i = 0;
965 gboolean invalid_action = FALSE;
966 int stonith_timeout = pcmk__timeout_ms2s(controld_globals.transition_graph->stonith_timeout);
967
968 CRM_CHECK(id != NULL, invalid_action = TRUE);
969 CRM_CHECK(uuid != NULL, invalid_action = TRUE);
970 CRM_CHECK(type != NULL, invalid_action = TRUE);
971 CRM_CHECK(target != NULL, invalid_action = TRUE);
972
973 if (invalid_action) {
974 crm_log_xml_warn(action->xml, "BadAction");
975 return EPROTO;
976 }
977
978 priority_delay = crm_meta_value(action->params,
979 PCMK_OPT_PRIORITY_FENCING_DELAY);
980
981 crm_notice("Requesting fencing (%s) targeting node %s "
982 QB_XS " action=%s timeout=%i%s%s",
983 type, target, id, stonith_timeout,
984 priority_delay ? " priority_delay=" : "",
985 priority_delay ? priority_delay : "");
986
987
988 controld_timer_fencer_connect(NULL);
989
990 pcmk__scan_min_int(priority_delay, &delay_i, 0);
991 rc = fence_with_delay(target, type, delay_i);
992 transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
993 action->id, 0,
994 controld_globals.te_uuid),
995 stonith_api->cmds->register_callback(stonith_api, rc,
996 (stonith_timeout
997 + (delay_i > 0 ? delay_i : 0)),
998 st_opt_timeout_updates, transition_key,
999 "tengine_stonith_callback",
1000 tengine_stonith_callback);
1001 return pcmk_rc_ok;
1002 }
1003
1004 bool
1005 controld_verify_stonith_watchdog_timeout(const char *value)
1006 {
1007 long long st_timeout = (value != NULL)? crm_get_msec(value) : 0;
1008 const char *our_nodename = controld_globals.cluster->priv->node_name;
1009
1010 if (st_timeout == 0
1011 || (stonith_api && (stonith_api->state != stonith_disconnected) &&
1012 stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
1013 our_nodename))) {
1014 return pcmk__valid_stonith_watchdog_timeout(value);
1015 }
1016 return true;
1017 }
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029 static crm_trigger_t *stonith_history_sync_trigger = NULL;
1030 static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
1031 static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
1032
1033 void
1034 te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
1035 {
1036 if (free_timers) {
1037 mainloop_timer_del(stonith_history_sync_timer_short);
1038 stonith_history_sync_timer_short = NULL;
1039 mainloop_timer_del(stonith_history_sync_timer_long);
1040 stonith_history_sync_timer_long = NULL;
1041 } else {
1042 mainloop_timer_stop(stonith_history_sync_timer_short);
1043 mainloop_timer_stop(stonith_history_sync_timer_long);
1044 }
1045
1046 if (st) {
1047 st->cmds->remove_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED);
1048 }
1049 }
1050
1051 static void
1052 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
1053 {
1054 te_cleanup_stonith_history_sync(st, FALSE);
1055 crm_debug("Fence-history synced - cancel all timers");
1056 }
1057
1058 static gboolean
1059 stonith_history_sync_set_trigger(gpointer user_data)
1060 {
1061 mainloop_set_trigger(stonith_history_sync_trigger);
1062 return FALSE;
1063 }
1064
1065 void
1066 te_trigger_stonith_history_sync(bool long_timeout)
1067 {
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085 if (stonith_history_sync_trigger == NULL) {
1086 stonith_history_sync_trigger =
1087 mainloop_add_trigger(G_PRIORITY_LOW,
1088 do_stonith_history_sync, NULL);
1089 }
1090
1091 if (long_timeout) {
1092 if(stonith_history_sync_timer_long == NULL) {
1093 stonith_history_sync_timer_long =
1094 mainloop_timer_add("history_sync_long", 30000,
1095 FALSE, stonith_history_sync_set_trigger,
1096 NULL);
1097 }
1098 crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
1099 mainloop_timer_start(stonith_history_sync_timer_long);
1100 } else {
1101 if(stonith_history_sync_timer_short == NULL) {
1102 stonith_history_sync_timer_short =
1103 mainloop_timer_add("history_sync_short", 5000,
1104 FALSE, stonith_history_sync_set_trigger,
1105 NULL);
1106 }
1107 crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
1108 mainloop_timer_start(stonith_history_sync_timer_short);
1109 }
1110
1111 }
1112
1113