This source file includes following definitions.
- update_stonith_max_attempts
- set_fence_reaction
- controld_configure_fencing
- too_many_st_failures
- st_fail_count_reset
- st_fail_count_increment
- cib_fencing_updated
- send_stonith_update
- abort_for_stonith_failure
- add_stonith_cleanup
- remove_stonith_cleanup
- purge_stonith_cleanup
- execute_stonith_cleanup
- fail_incompletable_stonith
- tengine_stonith_connection_destroy
- handle_fence_notification
- controld_timer_fencer_connect
- controld_disconnect_fencer
- do_stonith_history_sync
- tengine_stonith_callback
- fence_with_delay
- controld_execute_fence_action
- controld_verify_stonith_watchdog_timeout
- te_cleanup_stonith_history_sync
- tengine_stonith_history_synced
- stonith_history_sync_set_trigger
- te_trigger_stonith_history_sync
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include <crm/crm.h>
12 #include <crm/common/xml.h>
13 #include <crm/stonith-ng.h>
14 #include <crm/fencing/internal.h>
15
16 #include <pacemaker-controld.h>
17
18 static void
19 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
20
21
22
23
24
25
26
27
28
29 struct st_fail_rec {
30 int count;
31 };
32
33 static bool fence_reaction_panic = false;
34 static unsigned long int stonith_max_attempts = 10;
35 static GHashTable *stonith_failures = NULL;
36
37
38
39
40
41
42
43 static void
44 update_stonith_max_attempts(const char *value)
45 {
46 stonith_max_attempts = char2score(value);
47 if (stonith_max_attempts < 1UL) {
48 stonith_max_attempts = 10UL;
49 }
50 }
51
52
53
54
55
56
57
58 static void
59 set_fence_reaction(const char *reaction_s)
60 {
61 if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
62 fence_reaction_panic = true;
63
64 } else {
65 if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) {
66 crm_warn("Invalid value '%s' for %s, using 'stop'",
67 reaction_s, PCMK_OPT_FENCE_REACTION);
68 }
69 fence_reaction_panic = false;
70 }
71 }
72
73
74
75
76
77
78
79 void
80 controld_configure_fencing(GHashTable *options)
81 {
82 const char *value = NULL;
83
84 value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION);
85 set_fence_reaction(value);
86
87 value = g_hash_table_lookup(options, PCMK_OPT_STONITH_MAX_ATTEMPTS);
88 update_stonith_max_attempts(value);
89 }
90
91 static gboolean
92 too_many_st_failures(const char *target)
93 {
94 GHashTableIter iter;
95 const char *key = NULL;
96 struct st_fail_rec *value = NULL;
97
98 if (stonith_failures == NULL) {
99 return FALSE;
100 }
101
102 if (target == NULL) {
103 g_hash_table_iter_init(&iter, stonith_failures);
104 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
105 (gpointer *) &value)) {
106
107 if (value->count >= stonith_max_attempts) {
108 target = (const char*)key;
109 goto too_many;
110 }
111 }
112 } else {
113 value = g_hash_table_lookup(stonith_failures, target);
114 if ((value != NULL) && (value->count >= stonith_max_attempts)) {
115 goto too_many;
116 }
117 }
118 return FALSE;
119
120 too_many:
121 crm_warn("Too many failures (%d) to fence %s, giving up",
122 value->count, target);
123 return TRUE;
124 }
125
126
127
128
129
130
131
132 void
133 st_fail_count_reset(const char *target)
134 {
135 if (stonith_failures == NULL) {
136 return;
137 }
138
139 if (target) {
140 struct st_fail_rec *rec = NULL;
141
142 rec = g_hash_table_lookup(stonith_failures, target);
143 if (rec) {
144 rec->count = 0;
145 }
146 } else {
147 GHashTableIter iter;
148 const char *key = NULL;
149 struct st_fail_rec *rec = NULL;
150
151 g_hash_table_iter_init(&iter, stonith_failures);
152 while (g_hash_table_iter_next(&iter, (gpointer *) &key,
153 (gpointer *) &rec)) {
154 rec->count = 0;
155 }
156 }
157 }
158
159 static void
160 st_fail_count_increment(const char *target)
161 {
162 struct st_fail_rec *rec = NULL;
163
164 if (stonith_failures == NULL) {
165 stonith_failures = pcmk__strkey_table(free, free);
166 }
167
168 rec = g_hash_table_lookup(stonith_failures, target);
169 if (rec) {
170 rec->count++;
171 } else {
172 rec = malloc(sizeof(struct st_fail_rec));
173 if(rec == NULL) {
174 return;
175 }
176
177 rec->count = 1;
178 g_hash_table_insert(stonith_failures, pcmk__str_copy(target), rec);
179 }
180 }
181
182
183
184
185 static void
186 cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
187 void *user_data)
188 {
189 if (rc < pcmk_ok) {
190 crm_err("Fencing update %d for %s: failed - %s (%d)",
191 call_id, (char *)user_data, pcmk_strerror(rc), rc);
192 crm_log_xml_warn(msg, "Failed update");
193 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown,
194 "CIB update failed", NULL);
195
196 } else {
197 crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
198 }
199 }
200
201 static void
202 send_stonith_update(pcmk__graph_action_t *action, const char *target,
203 const char *uuid)
204 {
205 int rc = pcmk_ok;
206 crm_node_t *peer = NULL;
207
208
209
210
211
212 int flags = node_update_join | node_update_expected;
213
214
215 xmlNode *node_state = NULL;
216
217 CRM_CHECK(target != NULL, return);
218 CRM_CHECK(uuid != NULL, return);
219
220
221
222
223
224 peer = pcmk__get_node(0, target, uuid, pcmk__node_search_any);
225
226 CRM_CHECK(peer != NULL, return);
227
228 if (peer->state == NULL) {
229
230
231
232
233 flags |= node_update_cluster;
234 }
235
236 if (peer->uuid == NULL) {
237 crm_info("Recording uuid '%s' for node '%s'", uuid, target);
238 peer->uuid = pcmk__str_copy(uuid);
239 }
240
241 crmd_peer_down(peer, TRUE);
242
243
244 node_state = create_node_state_update(peer, flags, NULL, __func__);
245
246
247 if (peer->flags & crm_remote_node) {
248 char *now_s = pcmk__ttoa(time(NULL));
249
250 crm_xml_add(node_state, PCMK__XA_NODE_FENCED, now_s);
251 free(now_s);
252 }
253
254
255 crm_xml_add(node_state, PCMK_XA_ID, uuid);
256
257 rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
258 PCMK_XE_STATUS, node_state,
259 cib_scope_local
260 |cib_can_create);
261
262
263 crm_debug("Sending fencing update %d for %s", rc, target);
264 fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated);
265
266
267
268
269
270
271 controld_delete_node_state(peer->uname, controld_section_all,
272 cib_scope_local);
273 free_xml(node_state);
274 return;
275 }
276
277
278
279
280
281
282
283
284
285 static void
286 abort_for_stonith_failure(enum pcmk__graph_next abort_action,
287 const char *target, const xmlNode *reason)
288 {
289
290
291
292 if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
293 abort_action = pcmk__graph_wait;
294 }
295 abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed",
296 reason);
297 }
298
299
300
301
302
303
304
305
306
307
308 static GList *stonith_cleanup_list = NULL;
309
310
311
312
313
314
315
316 void
317 add_stonith_cleanup(const char *target) {
318 stonith_cleanup_list = g_list_append(stonith_cleanup_list,
319 pcmk__str_copy(target));
320 }
321
322
323
324
325
326
327
328 void
329 remove_stonith_cleanup(const char *target)
330 {
331 GList *iter = stonith_cleanup_list;
332
333 while (iter != NULL) {
334 GList *tmp = iter;
335 char *iter_name = tmp->data;
336
337 iter = iter->next;
338 if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
339 crm_trace("Removing %s from the cleanup list", iter_name);
340 stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
341 free(iter_name);
342 }
343 }
344 }
345
346
347
348
349
350 void
351 purge_stonith_cleanup(void)
352 {
353 if (stonith_cleanup_list) {
354 GList *iter = NULL;
355
356 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
357 char *target = iter->data;
358
359 crm_info("Purging %s from stonith cleanup list", target);
360 free(target);
361 }
362 g_list_free(stonith_cleanup_list);
363 stonith_cleanup_list = NULL;
364 }
365 }
366
367
368
369
370
371 void
372 execute_stonith_cleanup(void)
373 {
374 GList *iter;
375
376 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
377 char *target = iter->data;
378 crm_node_t *target_node =
379 pcmk__get_node(0, target, NULL, pcmk__node_search_cluster_member);
380 const char *uuid = pcmk__cluster_node_uuid(target_node);
381
382 crm_notice("Marking %s, target of a previous stonith action, as clean", target);
383 send_stonith_update(NULL, target, uuid);
384 free(target);
385 }
386 g_list_free(stonith_cleanup_list);
387 stonith_cleanup_list = NULL;
388 }
389
390
391
392
393
394
395
396
397
398 static stonith_t *stonith_api = NULL;
399 static mainloop_timer_t *controld_fencer_connect_timer = NULL;
400 static char *te_client_id = NULL;
401
402 static gboolean
403 fail_incompletable_stonith(pcmk__graph_t *graph)
404 {
405 GList *lpc = NULL;
406 const char *task = NULL;
407 xmlNode *last_action = NULL;
408
409 if (graph == NULL) {
410 return FALSE;
411 }
412
413 for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
414 GList *lpc2 = NULL;
415 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
416
417 if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
418 continue;
419 }
420
421 for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
422 pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
423
424 if ((action->type != pcmk__cluster_graph_action)
425 || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
426 continue;
427 }
428
429 task = crm_element_value(action->xml, PCMK_XA_OPERATION);
430 if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
431 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
432 last_action = action->xml;
433 pcmk__update_graph(graph, action);
434 crm_notice("Failing action %d (%s): fencer terminated",
435 action->id, pcmk__xe_id(action->xml));
436 }
437 }
438 }
439
440 if (last_action != NULL) {
441 crm_warn("Fencer failure resulted in unrunnable actions");
442 abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
443 return TRUE;
444 }
445
446 return FALSE;
447 }
448
449 static void
450 tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
451 {
452 te_cleanup_stonith_history_sync(st, FALSE);
453
454 if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
455 crm_err("Lost fencer connection (will attempt to reconnect)");
456 if (!mainloop_timer_running(controld_fencer_connect_timer)) {
457 mainloop_timer_start(controld_fencer_connect_timer);
458 }
459 } else {
460 crm_info("Disconnected from fencer");
461 }
462
463 if (stonith_api) {
464
465
466
467 if (stonith_api->state != stonith_disconnected) {
468 stonith_api->cmds->disconnect(st);
469 }
470 stonith_api->cmds->remove_notification(stonith_api, NULL);
471 }
472
473 if (AM_I_DC) {
474 fail_incompletable_stonith(controld_globals.transition_graph);
475 trigger_graph();
476 }
477 }
478
479
480
481
482
483
484
485
486 static void
487 handle_fence_notification(stonith_t *st, stonith_event_t *event)
488 {
489 bool succeeded = true;
490 const char *executioner = "the cluster";
491 const char *client = "a client";
492 const char *reason = NULL;
493 int exec_status;
494
495 if (te_client_id == NULL) {
496 te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
497 (unsigned long) getpid());
498 }
499
500 if (event == NULL) {
501 crm_err("Notify data not found");
502 return;
503 }
504
505 if (event->executioner != NULL) {
506 executioner = event->executioner;
507 }
508 if (event->client_origin != NULL) {
509 client = event->client_origin;
510 }
511
512 exec_status = stonith__event_execution_status(event);
513 if ((stonith__event_exit_status(event) != CRM_EX_OK)
514 || (exec_status != PCMK_EXEC_DONE)) {
515 succeeded = false;
516 if (exec_status == PCMK_EXEC_DONE) {
517 exec_status = PCMK_EXEC_ERROR;
518 }
519 }
520 reason = stonith__event_exit_reason(event);
521
522 crmd_alert_fencing_op(event);
523
524 if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) {
525
526 if (succeeded) {
527 crm_notice("%s was unfenced by %s at the request of %s@%s",
528 event->target, executioner, client, event->origin);
529 } else {
530 crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
531 event->target, executioner,
532 pcmk_exec_status_str(exec_status),
533 ((reason == NULL)? "" : ": "),
534 ((reason == NULL)? "" : reason),
535 stonith__event_exit_status(event));
536 }
537 return;
538 }
539
540 if (succeeded
541 && pcmk__str_eq(event->target, controld_globals.our_nodename,
542 pcmk__str_casei)) {
543
544
545
546
547
548
549
550
551
552 crm_crit("We were allegedly just fenced by %s for %s!",
553 executioner, event->origin);
554 if (fence_reaction_panic) {
555 pcmk__panic(__func__);
556 } else {
557 crm_exit(CRM_EX_FATAL);
558 }
559 return;
560 }
561
562
563
564
565
566 if (!AM_I_DC) {
567 if (succeeded) {
568 st_fail_count_reset(event->target);
569 } else {
570 st_fail_count_increment(event->target);
571 }
572 }
573
574 crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
575 "%s%s%s%s " CRM_XS " event=%s",
576 event->target, (succeeded? "" : " not"),
577 event->action, executioner, client, event->origin,
578 (succeeded? "OK" : pcmk_exec_status_str(exec_status)),
579 ((reason == NULL)? "" : " ("),
580 ((reason == NULL)? "" : reason),
581 ((reason == NULL)? "" : ")"),
582 event->id);
583
584 if (succeeded) {
585 const uint32_t flags = pcmk__node_search_any
586 |pcmk__node_search_cluster_cib;
587
588 crm_node_t *peer = pcmk__search_node_caches(0, event->target, flags);
589 const char *uuid = NULL;
590
591 if (peer == NULL) {
592 return;
593 }
594
595 uuid = pcmk__cluster_node_uuid(peer);
596
597 if (AM_I_DC) {
598
599 send_stonith_update(NULL, event->target, uuid);
600
601
602
603
604
605
606
607
608
609 if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
610
611
612
613 crm_info("External fencing operation from %s fenced %s",
614 client, event->target);
615 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
616 "External Fencing Operation", NULL);
617 }
618
619 } else if (pcmk__str_eq(controld_globals.dc_name, event->target,
620 pcmk__str_null_matches|pcmk__str_casei)
621 && !pcmk_is_set(peer->flags, crm_remote_node)) {
622
623
624 if (controld_globals.dc_name != NULL) {
625 crm_notice("Fencing target %s was our DC", event->target);
626 } else {
627 crm_notice("Fencing target %s may have been our DC",
628 event->target);
629 }
630
631
632
633
634
635 if (pcmk__str_eq(event->executioner, controld_globals.our_nodename,
636 pcmk__str_casei)) {
637 send_stonith_update(NULL, event->target, uuid);
638 }
639 add_stonith_cleanup(event->target);
640 }
641
642
643
644
645
646
647 if (pcmk_is_set(peer->flags, crm_remote_node)) {
648 remote_ra_fail(event->target);
649 }
650
651 crmd_peer_down(peer, TRUE);
652 }
653 }
654
655
656
657
658
659
660
661
662
663
664 gboolean
665 controld_timer_fencer_connect(gpointer user_data)
666 {
667 int rc = pcmk_ok;
668
669 if (stonith_api == NULL) {
670 stonith_api = stonith_api_new();
671 if (stonith_api == NULL) {
672 crm_err("Could not connect to fencer: API memory allocation failed");
673 return G_SOURCE_REMOVE;
674 }
675 }
676
677 if (stonith_api->state != stonith_disconnected) {
678 crm_trace("Already connected to fencer, no need to retry");
679 return G_SOURCE_REMOVE;
680 }
681
682 if (user_data == NULL) {
683
684 rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
685 if (rc != pcmk_ok) {
686 crm_err("Could not connect to fencer in 30 attempts: %s "
687 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
688 }
689 } else {
690
691 rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
692
693 if (controld_fencer_connect_timer == NULL) {
694 controld_fencer_connect_timer =
695 mainloop_timer_add("controld_fencer_connect", 1000,
696 TRUE, controld_timer_fencer_connect,
697 GINT_TO_POINTER(TRUE));
698 }
699
700 if (rc != pcmk_ok) {
701 if (pcmk_is_set(controld_globals.fsa_input_register,
702 R_ST_REQUIRED)) {
703 crm_notice("Fencer connection failed (will retry): %s "
704 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
705
706 if (!mainloop_timer_running(controld_fencer_connect_timer)) {
707 mainloop_timer_start(controld_fencer_connect_timer);
708 }
709
710 return G_SOURCE_CONTINUE;
711 } else {
712 crm_info("Fencer connection failed (ignoring because no longer required): %s "
713 CRM_XS " rc=%d", pcmk_strerror(rc), rc);
714 }
715 return G_SOURCE_REMOVE;
716 }
717 }
718
719 if (rc == pcmk_ok) {
720 stonith_api_operations_t *cmds = stonith_api->cmds;
721
722 cmds->register_notification(stonith_api,
723 PCMK__VALUE_ST_NOTIFY_DISCONNECT,
724 tengine_stonith_connection_destroy);
725 cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_FENCE,
726 handle_fence_notification);
727 cmds->register_notification(stonith_api,
728 PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
729 tengine_stonith_history_synced);
730 te_trigger_stonith_history_sync(TRUE);
731 crm_notice("Fencer successfully connected");
732 }
733
734 return G_SOURCE_REMOVE;
735 }
736
737 void
738 controld_disconnect_fencer(bool destroy)
739 {
740 if (stonith_api) {
741
742 controld_clear_fsa_input_flags(R_ST_REQUIRED);
743
744 if (stonith_api->state != stonith_disconnected) {
745 stonith_api->cmds->disconnect(stonith_api);
746 }
747 stonith_api->cmds->remove_notification(stonith_api, NULL);
748 }
749 if (destroy) {
750 if (stonith_api) {
751 stonith_api->cmds->free(stonith_api);
752 stonith_api = NULL;
753 }
754 if (controld_fencer_connect_timer) {
755 mainloop_timer_del(controld_fencer_connect_timer);
756 controld_fencer_connect_timer = NULL;
757 }
758 if (te_client_id) {
759 free(te_client_id);
760 te_client_id = NULL;
761 }
762 }
763 }
764
765 static gboolean
766 do_stonith_history_sync(gpointer user_data)
767 {
768 if (stonith_api && (stonith_api->state != stonith_disconnected)) {
769 stonith_history_t *history = NULL;
770
771 te_cleanup_stonith_history_sync(stonith_api, FALSE);
772 stonith_api->cmds->history(stonith_api,
773 st_opt_sync_call | st_opt_broadcast,
774 NULL, &history, 5);
775 stonith_history_free(history);
776 return TRUE;
777 } else {
778 crm_info("Skip triggering stonith history-sync as stonith is disconnected");
779 return FALSE;
780 }
781 }
782
783 static void
784 tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
785 {
786 char *uuid = NULL;
787 int stonith_id = -1;
788 int transition_id = -1;
789 pcmk__graph_action_t *action = NULL;
790 const char *target = NULL;
791
792 if ((data == NULL) || (data->userdata == NULL)) {
793 crm_err("Ignoring fence operation %d result: "
794 "No transition key given (bug?)",
795 ((data == NULL)? -1 : data->call_id));
796 return;
797 }
798
799 if (!AM_I_DC) {
800 const char *reason = stonith__exit_reason(data);
801
802 if (reason == NULL) {
803 reason = pcmk_exec_status_str(stonith__execution_status(data));
804 }
805 crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
806 data->call_id, stonith__exit_status(data), reason,
807 (const char *) data->userdata);
808 return;
809 }
810
811 CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
812 &stonith_id, NULL),
813 goto bail);
814
815 if (controld_globals.transition_graph->complete || (stonith_id < 0)
816 || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
817 || (controld_globals.transition_graph->id != transition_id)) {
818 crm_info("Ignoring fence operation %d result: "
819 "Not from current transition " CRM_XS
820 " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
821 data->call_id,
822 pcmk__btoa(controld_globals.transition_graph->complete),
823 stonith_id, uuid, controld_globals.te_uuid, transition_id,
824 controld_globals.transition_graph->id);
825 goto bail;
826 }
827
828 action = controld_get_action(stonith_id);
829 if (action == NULL) {
830 crm_err("Ignoring fence operation %d result: "
831 "Action %d not found in transition graph (bug?) "
832 CRM_XS " uuid=%s transition=%d",
833 data->call_id, stonith_id, uuid, transition_id);
834 goto bail;
835 }
836
837 target = crm_element_value(action->xml, PCMK__META_ON_NODE);
838 if (target == NULL) {
839 crm_err("Ignoring fence operation %d result: No target given (bug?)",
840 data->call_id);
841 goto bail;
842 }
843
844 stop_te_timer(action);
845 if (stonith__exit_status(data) == CRM_EX_OK) {
846 const char *uuid = crm_element_value(action->xml,
847 PCMK__META_ON_NODE_UUID);
848 const char *op = crm_meta_value(action->params,
849 PCMK__META_STONITH_ACTION);
850
851 crm_info("Fence operation %d for %s succeeded", data->call_id, target);
852 if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
853 te_action_confirmed(action, NULL);
854 if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) {
855 const char *value = NULL;
856 char *now = pcmk__ttoa(time(NULL));
857 gboolean is_remote_node = FALSE;
858
859
860
861
862
863
864
865
866 if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
867 is_remote_node = TRUE;
868 }
869
870 update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
871 is_remote_node);
872 free(now);
873
874 value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL);
875 update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
876 is_remote_node);
877
878 value = crm_meta_value(action->params,
879 PCMK__META_DIGESTS_SECURE);
880 update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
881 is_remote_node);
882
883 } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
884 send_stonith_update(action, target, uuid);
885 pcmk__set_graph_action_flags(action,
886 pcmk__graph_action_sent_update);
887 }
888 }
889 st_fail_count_reset(target);
890
891 } else {
892 enum pcmk__graph_next abort_action = pcmk__graph_restart;
893 int status = stonith__execution_status(data);
894 const char *reason = stonith__exit_reason(data);
895
896 if (reason == NULL) {
897 if (status == PCMK_EXEC_DONE) {
898 reason = "Agent returned error";
899 } else {
900 reason = pcmk_exec_status_str(status);
901 }
902 }
903 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
904
905
906
907
908 if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
909 crm_warn("Fence operation %d for %s failed: %s "
910 "(aborting transition and giving up for now)",
911 data->call_id, target, reason);
912 abort_action = pcmk__graph_wait;
913 } else {
914 crm_notice("Fence operation %d for %s failed: %s "
915 "(aborting transition)", data->call_id, target, reason);
916 }
917
918
919
920
921
922 st_fail_count_increment(target);
923 abort_for_stonith_failure(abort_action, target, NULL);
924 }
925
926 pcmk__update_graph(controld_globals.transition_graph, action);
927 trigger_graph();
928
929 bail:
930 free(data->userdata);
931 free(uuid);
932 return;
933 }
934
935 static int
936 fence_with_delay(const char *target, const char *type, int delay)
937 {
938 uint32_t options = st_opt_none;
939 int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout
940 / 1000);
941
942 if (crmd_join_phase_count(crm_join_confirmed) == 1) {
943 stonith__set_call_options(options, target, st_opt_allow_suicide);
944 }
945 return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
946 type, timeout_sec, 0, delay);
947 }
948
949
950
951
952
953
954
955
956
957
958 int
959 controld_execute_fence_action(pcmk__graph_t *graph,
960 pcmk__graph_action_t *action)
961 {
962 int rc = 0;
963 const char *id = pcmk__xe_id(action->xml);
964 const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
965 const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
966 const char *type = crm_meta_value(action->params,
967 PCMK__META_STONITH_ACTION);
968 char *transition_key = NULL;
969 const char *priority_delay = NULL;
970 int delay_i = 0;
971 gboolean invalid_action = FALSE;
972 int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout
973 / 1000);
974
975 CRM_CHECK(id != NULL, invalid_action = TRUE);
976 CRM_CHECK(uuid != NULL, invalid_action = TRUE);
977 CRM_CHECK(type != NULL, invalid_action = TRUE);
978 CRM_CHECK(target != NULL, invalid_action = TRUE);
979
980 if (invalid_action) {
981 crm_log_xml_warn(action->xml, "BadAction");
982 return EPROTO;
983 }
984
985 priority_delay = crm_meta_value(action->params,
986 PCMK_OPT_PRIORITY_FENCING_DELAY);
987
988 crm_notice("Requesting fencing (%s) targeting node %s "
989 CRM_XS " action=%s timeout=%i%s%s",
990 type, target, id, stonith_timeout,
991 priority_delay ? " priority_delay=" : "",
992 priority_delay ? priority_delay : "");
993
994
995 controld_timer_fencer_connect(NULL);
996
997 pcmk__scan_min_int(priority_delay, &delay_i, 0);
998 rc = fence_with_delay(target, type, delay_i);
999 transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
1000 action->id, 0,
1001 controld_globals.te_uuid),
1002 stonith_api->cmds->register_callback(stonith_api, rc,
1003 (stonith_timeout
1004 + (delay_i > 0 ? delay_i : 0)),
1005 st_opt_timeout_updates, transition_key,
1006 "tengine_stonith_callback",
1007 tengine_stonith_callback);
1008 return pcmk_rc_ok;
1009 }
1010
1011 bool
1012 controld_verify_stonith_watchdog_timeout(const char *value)
1013 {
1014 long long st_timeout = (value != NULL)? crm_get_msec(value) : 0;
1015 const char *our_nodename = controld_globals.our_nodename;
1016
1017 if (st_timeout == 0
1018 || (stonith_api && (stonith_api->state != stonith_disconnected) &&
1019 stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
1020 our_nodename))) {
1021 return pcmk__valid_stonith_watchdog_timeout(value);
1022 }
1023 return true;
1024 }
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036 static crm_trigger_t *stonith_history_sync_trigger = NULL;
1037 static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
1038 static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
1039
1040 void
1041 te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
1042 {
1043 if (free_timers) {
1044 mainloop_timer_del(stonith_history_sync_timer_short);
1045 stonith_history_sync_timer_short = NULL;
1046 mainloop_timer_del(stonith_history_sync_timer_long);
1047 stonith_history_sync_timer_long = NULL;
1048 } else {
1049 mainloop_timer_stop(stonith_history_sync_timer_short);
1050 mainloop_timer_stop(stonith_history_sync_timer_long);
1051 }
1052
1053 if (st) {
1054 st->cmds->remove_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED);
1055 }
1056 }
1057
1058 static void
1059 tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
1060 {
1061 te_cleanup_stonith_history_sync(st, FALSE);
1062 crm_debug("Fence-history synced - cancel all timers");
1063 }
1064
1065 static gboolean
1066 stonith_history_sync_set_trigger(gpointer user_data)
1067 {
1068 mainloop_set_trigger(stonith_history_sync_trigger);
1069 return FALSE;
1070 }
1071
1072 void
1073 te_trigger_stonith_history_sync(bool long_timeout)
1074 {
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 if (stonith_history_sync_trigger == NULL) {
1093 stonith_history_sync_trigger =
1094 mainloop_add_trigger(G_PRIORITY_LOW,
1095 do_stonith_history_sync, NULL);
1096 }
1097
1098 if (long_timeout) {
1099 if(stonith_history_sync_timer_long == NULL) {
1100 stonith_history_sync_timer_long =
1101 mainloop_timer_add("history_sync_long", 30000,
1102 FALSE, stonith_history_sync_set_trigger,
1103 NULL);
1104 }
1105 crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
1106 mainloop_timer_start(stonith_history_sync_timer_long);
1107 } else {
1108 if(stonith_history_sync_timer_short == NULL) {
1109 stonith_history_sync_timer_short =
1110 mainloop_timer_add("history_sync_short", 5000,
1111 FALSE, stonith_history_sync_set_trigger,
1112 NULL);
1113 }
1114 crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
1115 mainloop_timer_start(stonith_history_sync_timer_short);
1116 }
1117
1118 }
1119
1120