This source file includes following definitions.
- sort_strings
- free_remote_query
- free_stonith_remote_op_list
- count_peer_device
- count_peer_devices
- find_peer_device
- grab_peer_device
- clear_remote_op_timers
- free_remote_op
- init_stonith_remote_op_hash_table
- op_requested_action
- op_phase_off
- op_phase_on
- undo_op_remap
- fencing_result2xml
- fenced_broadcast_op_result
- handle_local_reply_and_notify
- finalize_op_duplicates
- delegate_from_xml
- finalize_op
- remote_op_watchdog_done
- remote_op_timeout_one
- finalize_timed_out_op
- remote_op_timeout
- remote_op_query_timeout
- topology_is_empty
- add_required_device
- remove_required_device
- set_op_device_list
- topology_matches
- find_topology_for_host
- advance_topology_level
- merge_duplicates
- fencing_active_peers
- fenced_handle_manual_confirmation
- create_remote_stonith_op
- initiate_remote_stonith_op
- find_best_peer
- stonith_choose_peer
- get_device_timeout
- add_device_timeout
- get_peer_timeout
- get_op_total_timeout
- report_timeout_period
- advance_topology_device_in_level
- check_watchdog_fencing_and_wait
- request_peer_fencing
- sort_peers
- all_topology_devices_found
- parse_action_specific
- add_device_properties
- add_result
- process_remote_stonith_query
- fenced_process_fencing_reply
- stonith_check_fence_tolerance
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <stdio.h>
14 #include <sys/types.h>
15 #include <sys/wait.h>
16 #include <sys/stat.h>
17 #include <unistd.h>
18 #include <sys/utsname.h>
19
20 #include <stdlib.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <ctype.h>
24 #include <regex.h>
25
26 #include <crm/crm.h>
27 #include <crm/msg_xml.h>
28 #include <crm/common/ipc.h>
29 #include <crm/common/ipc_internal.h>
30 #include <crm/cluster/internal.h>
31
32 #include <crm/stonith-ng.h>
33 #include <crm/fencing/internal.h>
34 #include <crm/common/xml.h>
35 #include <crm/common/xml_internal.h>
36
37 #include <crm/common/util.h>
38 #include <pacemaker-fenced.h>
39
40 #define TIMEOUT_MULTIPLY_FACTOR 1.2
41
42
43
44
45
46
47
48 typedef struct device_properties_s {
49
50 gboolean verified;
51
52
53
54
55 gboolean executed[st_phase_max];
56
57 gboolean disallowed[st_phase_max];
58
59 int custom_action_timeout[st_phase_max];
60
61 int delay_max[st_phase_max];
62
63 int delay_base[st_phase_max];
64
65 uint32_t device_support_flags;
66 } device_properties_t;
67
68 typedef struct {
69
70 char *host;
71
72 gboolean tried;
73
74 int ndevices;
75
76 GHashTable *devices;
77 } peer_device_info_t;
78
79 GHashTable *stonith_remote_op_list = NULL;
80
81 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
82 int call_options);
83
84 static void request_peer_fencing(remote_fencing_op_t *op,
85 peer_device_info_t *peer);
86 static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
87 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
88 static int get_op_total_timeout(const remote_fencing_op_t *op,
89 const peer_device_info_t *chosen_peer);
90
91 static gint
92 sort_strings(gconstpointer a, gconstpointer b)
93 {
94 return strcmp(a, b);
95 }
96
97 static void
98 free_remote_query(gpointer data)
99 {
100 if (data != NULL) {
101 peer_device_info_t *peer = data;
102
103 g_hash_table_destroy(peer->devices);
104 free(peer->host);
105 free(peer);
106 }
107 }
108
109 void
110 free_stonith_remote_op_list(void)
111 {
112 if (stonith_remote_op_list != NULL) {
113 g_hash_table_destroy(stonith_remote_op_list);
114 stonith_remote_op_list = NULL;
115 }
116 }
117
118 struct peer_count_data {
119 const remote_fencing_op_t *op;
120 gboolean verified_only;
121 uint32_t support_action_only;
122 int count;
123 };
124
125
126
127
128
129
130
131
132
133 static void
134 count_peer_device(gpointer key, gpointer value, gpointer user_data)
135 {
136 device_properties_t *props = (device_properties_t*)value;
137 struct peer_count_data *data = user_data;
138
139 if (!props->executed[data->op->phase]
140 && (!data->verified_only || props->verified)
141 && ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) {
142 ++(data->count);
143 }
144 }
145
146
147
148
149
150
151
152
153
154
155
156
157 static int
158 count_peer_devices(const remote_fencing_op_t *op,
159 const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only)
160 {
161 struct peer_count_data data;
162
163 data.op = op;
164 data.verified_only = verified_only;
165 data.support_action_only = support_on_action_only;
166 data.count = 0;
167 if (peer) {
168 g_hash_table_foreach(peer->devices, count_peer_device, &data);
169 }
170 return data.count;
171 }
172
173
174
175
176
177
178
179
180
181
182
183 static device_properties_t *
184 find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer,
185 const char *device, uint32_t support_action_only)
186 {
187 device_properties_t *props = g_hash_table_lookup(peer->devices, device);
188
189 if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) {
190 return NULL;
191 }
192 return (props && !props->executed[op->phase]
193 && !props->disallowed[op->phase])? props : NULL;
194 }
195
196
197
198
199
200
201
202
203
204
205
206
207 static gboolean
208 grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer,
209 const char *device, gboolean verified_devices_only)
210 {
211 device_properties_t *props = find_peer_device(op, peer, device,
212 fenced_support_flag(op->action));
213
214 if ((props == NULL) || (verified_devices_only && !props->verified)) {
215 return FALSE;
216 }
217
218 crm_trace("Removing %s from %s (%d remaining)",
219 device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none));
220 props->executed[op->phase] = TRUE;
221 return TRUE;
222 }
223
224 static void
225 clear_remote_op_timers(remote_fencing_op_t * op)
226 {
227 if (op->query_timer) {
228 g_source_remove(op->query_timer);
229 op->query_timer = 0;
230 }
231 if (op->op_timer_total) {
232 g_source_remove(op->op_timer_total);
233 op->op_timer_total = 0;
234 }
235 if (op->op_timer_one) {
236 g_source_remove(op->op_timer_one);
237 op->op_timer_one = 0;
238 }
239 }
240
241 static void
242 free_remote_op(gpointer data)
243 {
244 remote_fencing_op_t *op = data;
245
246 crm_log_xml_debug(op->request, "Destroying");
247
248 clear_remote_op_timers(op);
249
250 free(op->id);
251 free(op->action);
252 free(op->delegate);
253 free(op->target);
254 free(op->client_id);
255 free(op->client_name);
256 free(op->originator);
257
258 if (op->query_results) {
259 g_list_free_full(op->query_results, free_remote_query);
260 }
261 if (op->request) {
262 free_xml(op->request);
263 op->request = NULL;
264 }
265 if (op->devices_list) {
266 g_list_free_full(op->devices_list, free);
267 op->devices_list = NULL;
268 }
269 g_list_free_full(op->automatic_list, free);
270 g_list_free(op->duplicates);
271
272 pcmk__reset_result(&op->result);
273 free(op);
274 }
275
276 void
277 init_stonith_remote_op_hash_table(GHashTable **table)
278 {
279 if (*table == NULL) {
280 *table = pcmk__strkey_table(NULL, free_remote_op);
281 }
282 }
283
284
285
286
287
288
289
290
291
292 static const char *
293 op_requested_action(const remote_fencing_op_t *op)
294 {
295 return ((op->phase > st_phase_requested)? "reboot" : op->action);
296 }
297
298
299
300
301
302
303
304 static void
305 op_phase_off(remote_fencing_op_t *op)
306 {
307 crm_info("Remapping multiple-device reboot targeting %s to 'off' "
308 CRM_XS " id=%.8s", op->target, op->id);
309 op->phase = st_phase_off;
310
311
312
313
314 strcpy(op->action, "off");
315 }
316
317
318
319
320
321
322
323 static void
324 op_phase_on(remote_fencing_op_t *op)
325 {
326 GList *iter = NULL;
327
328 crm_info("Remapped 'off' targeting %s complete, "
329 "remapping to 'on' for %s " CRM_XS " id=%.8s",
330 op->target, op->client_name, op->id);
331 op->phase = st_phase_on;
332 strcpy(op->action, "on");
333
334
335
336
337 for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
338 GList *match = g_list_find_custom(op->devices_list, iter->data,
339 sort_strings);
340
341 if (match) {
342 op->devices_list = g_list_remove(op->devices_list, match->data);
343 }
344 }
345 g_list_free_full(op->automatic_list, free);
346 op->automatic_list = NULL;
347
348
349 op->devices = op->devices_list;
350 }
351
352
353
354
355
356
357
358 static void
359 undo_op_remap(remote_fencing_op_t *op)
360 {
361 if (op->phase > 0) {
362 crm_info("Undoing remap of reboot targeting %s for %s "
363 CRM_XS " id=%.8s", op->target, op->client_name, op->id);
364 op->phase = st_phase_requested;
365 strcpy(op->action, "reboot");
366 }
367 }
368
369
370
371
372
373
374
375
376
377
378 static xmlNode *
379 fencing_result2xml(const remote_fencing_op_t *op)
380 {
381 xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
382
383 crm_xml_add_int(notify_data, "state", op->state);
384 crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
385 crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
386 crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
387 crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id);
388 crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
389 crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
390 crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
391
392 return notify_data;
393 }
394
395
396
397
398
399
400
401
402 void
403 fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged)
404 {
405 static int count = 0;
406 xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
407 xmlNode *notify_data = fencing_result2xml(op);
408
409 count++;
410 crm_trace("Broadcasting result to peers");
411 crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY);
412 crm_xml_add(bcast, F_SUBTYPE, "broadcast");
413 crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY);
414 crm_xml_add_int(bcast, "count", count);
415
416 if (op_merged) {
417 pcmk__xe_set_bool_attr(bcast, F_STONITH_MERGED, true);
418 }
419
420 stonith__xe_set_result(notify_data, &op->result);
421
422 add_message_xml(bcast, F_STONITH_CALLDATA, notify_data);
423 send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
424 free_xml(notify_data);
425 free_xml(bcast);
426
427 return;
428 }
429
430
431
432
433
434
435
436
437 static void
438 handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
439 {
440 xmlNode *notify_data = NULL;
441 xmlNode *reply = NULL;
442 pcmk__client_t *client = NULL;
443
444 if (op->notify_sent == TRUE) {
445
446 return;
447 }
448
449
450 crm_xml_add_int(data, "state", op->state);
451 crm_xml_add(data, F_STONITH_TARGET, op->target);
452 crm_xml_add(data, F_STONITH_OPERATION, op->action);
453
454 reply = fenced_construct_reply(op->request, data, &op->result);
455 crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
456
457
458 client = pcmk__find_client_by_id(op->client_id);
459 if (client == NULL) {
460 crm_trace("Skipping reply to %s: no longer a client", op->client_id);
461 } else {
462 do_local_reply(reply, client, op->call_options);
463 }
464
465
466 notify_data = fencing_result2xml(op);
467 fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data);
468 free_xml(notify_data);
469 fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
470
471
472 op->notify_sent = TRUE;
473 free_xml(reply);
474 }
475
476
477
478
479
480
481
482
483 static void
484 finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data)
485 {
486 for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
487 remote_fencing_op_t *other = iter->data;
488
489 if (other->state == st_duplicate) {
490 other->state = op->state;
491 crm_debug("Performing duplicate notification for %s@%s: %s "
492 CRM_XS " id=%.8s",
493 other->client_name, other->originator,
494 pcmk_exec_status_str(op->result.execution_status),
495 other->id);
496 pcmk__copy_result(&op->result, &other->result);
497 finalize_op(other, data, true);
498
499 } else {
500
501 crm_err("Skipping duplicate notification for %s@%s "
502 CRM_XS " state=%s id=%.8s",
503 other->client_name, other->originator,
504 stonith_op_state_str(other->state), other->id);
505 }
506 }
507 }
508
509 static char *
510 delegate_from_xml(xmlNode *xml)
511 {
512 xmlNode *match = get_xpath_object("//@" F_STONITH_DELEGATE, xml, LOG_NEVER);
513
514 if (match == NULL) {
515 return crm_element_value_copy(xml, F_ORIG);
516 } else {
517 return crm_element_value_copy(match, F_STONITH_DELEGATE);
518 }
519 }
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537 static void
538 finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
539 {
540 int level = LOG_ERR;
541 const char *subt = NULL;
542 xmlNode *local_data = NULL;
543 gboolean op_merged = FALSE;
544
545 CRM_CHECK((op != NULL), return);
546
547 if (op->notify_sent) {
548
549 crm_notice("Operation '%s'%s%s by %s for %s@%s%s: "
550 "Result arrived too late " CRM_XS " id=%.8s",
551 op->action, (op->target? " targeting " : ""),
552 (op->target? op->target : ""),
553 (op->delegate? op->delegate : "unknown node"),
554 op->client_name, op->originator,
555 (op_merged? " (merged)" : ""),
556 op->id);
557 return;
558 }
559
560 set_fencing_completed(op);
561 clear_remote_op_timers(op);
562 undo_op_remap(op);
563
564 if (data == NULL) {
565 data = create_xml_node(NULL, "remote-op");
566 local_data = data;
567
568 } else if (op->delegate == NULL) {
569 switch (op->result.execution_status) {
570 case PCMK_EXEC_NO_FENCE_DEVICE:
571 break;
572
573 case PCMK_EXEC_INVALID:
574 if (op->result.exit_status != CRM_EX_EXPIRED) {
575 op->delegate = delegate_from_xml(data);
576 }
577 break;
578
579 default:
580 op->delegate = delegate_from_xml(data);
581 break;
582 }
583 }
584
585 if (dup || (crm_element_value(data, F_STONITH_MERGED) != NULL)) {
586 op_merged = true;
587 }
588
589
590
591
592 subt = crm_element_value(data, F_SUBTYPE);
593 if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
594
595 fenced_broadcast_op_result(op, op_merged);
596 free_xml(local_data);
597 return;
598 }
599
600 if (pcmk__result_ok(&op->result) || dup
601 || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
602 level = LOG_NOTICE;
603 }
604 do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) "
605 CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""),
606 (op->target? op->target : ""),
607 (op->delegate? op->delegate : "unknown node"),
608 op->client_name, op->originator,
609 (op_merged? " (merged)" : ""),
610 crm_exit_str(op->result.exit_status),
611 pcmk_exec_status_str(op->result.execution_status),
612 ((op->result.exit_reason == NULL)? "" : ": "),
613 ((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
614 op->id);
615
616 handle_local_reply_and_notify(op, data);
617
618 if (!dup) {
619 finalize_op_duplicates(op, data);
620 }
621
622
623
624
625 if (op->query_results) {
626 g_list_free_full(op->query_results, free_remote_query);
627 op->query_results = NULL;
628 }
629 if (op->request) {
630 free_xml(op->request);
631 op->request = NULL;
632 }
633
634 free_xml(local_data);
635 }
636
637
638
639
640
641
642
643
644
645 static gboolean
646 remote_op_watchdog_done(gpointer userdata)
647 {
648 remote_fencing_op_t *op = userdata;
649
650 op->op_timer_one = 0;
651
652 crm_notice("Self-fencing (%s) by %s for %s assumed complete "
653 CRM_XS " id=%.8s",
654 op->action, op->target, op->client_name, op->id);
655 op->state = st_done;
656 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
657 finalize_op(op, NULL, false);
658 return G_SOURCE_REMOVE;
659 }
660
661 static gboolean
662 remote_op_timeout_one(gpointer userdata)
663 {
664 remote_fencing_op_t *op = userdata;
665
666 op->op_timer_one = 0;
667
668 crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
669 " id=%.8s", op->action, op->target, op->client_name, op->id);
670 pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
671 "Peer did not return fence result within timeout");
672
673
674 request_peer_fencing(op, NULL);
675 return FALSE;
676 }
677
678
679
680
681
682
683
684
685 static void
686 finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
687 {
688 op->op_timer_total = 0;
689
690 crm_debug("Action '%s' targeting %s for client %s timed out "
691 CRM_XS " id=%.8s",
692 op->action, op->target, op->client_name, op->id);
693
694 if (op->phase == st_phase_on) {
695
696
697
698
699 op->state = st_done;
700 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
701 } else {
702 op->state = st_failed;
703 pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
704 }
705 finalize_op(op, NULL, false);
706 }
707
708
709
710
711
712
713
714
715
716 static gboolean
717 remote_op_timeout(gpointer userdata)
718 {
719 remote_fencing_op_t *op = userdata;
720
721 if (op->state == st_done) {
722 crm_debug("Action '%s' targeting %s for client %s already completed "
723 CRM_XS " id=%.8s",
724 op->action, op->target, op->client_name, op->id);
725 } else {
726 finalize_timed_out_op(userdata, "Fencing did not complete within a "
727 "total timeout based on the "
728 "configured timeout and retries for "
729 "any devices attempted");
730 }
731 return G_SOURCE_REMOVE;
732 }
733
734 static gboolean
735 remote_op_query_timeout(gpointer data)
736 {
737 remote_fencing_op_t *op = data;
738
739 op->query_timer = 0;
740 if (op->state == st_done) {
741 crm_debug("Operation %.8s targeting %s already completed",
742 op->id, op->target);
743 } else if (op->state == st_exec) {
744 crm_debug("Operation %.8s targeting %s already in progress",
745 op->id, op->target);
746 } else if (op->query_results) {
747
748 crm_debug("Query %.8s targeting %s complete (state=%s)",
749 op->id, op->target, stonith_op_state_str(op->state));
750 request_peer_fencing(op, NULL);
751 } else {
752 crm_debug("Query %.8s targeting %s timed out (state=%s)",
753 op->id, op->target, stonith_op_state_str(op->state));
754 if (op->op_timer_total) {
755 g_source_remove(op->op_timer_total);
756 op->op_timer_total = 0;
757 }
758 finalize_timed_out_op(op, "No capable peers replied to device query "
759 "within timeout");
760 }
761
762 return FALSE;
763 }
764
765 static gboolean
766 topology_is_empty(stonith_topology_t *tp)
767 {
768 int i;
769
770 if (tp == NULL) {
771 return TRUE;
772 }
773
774 for (i = 0; i < ST_LEVEL_MAX; i++) {
775 if (tp->levels[i] != NULL) {
776 return FALSE;
777 }
778 }
779 return TRUE;
780 }
781
782
783
784
785
786
787
788
789 static void
790 add_required_device(remote_fencing_op_t *op, const char *device)
791 {
792 GList *match = g_list_find_custom(op->automatic_list, device,
793 sort_strings);
794
795 if (!match) {
796 op->automatic_list = g_list_prepend(op->automatic_list, strdup(device));
797 }
798 }
799
800
801
802
803
804
805
806
807 static void
808 remove_required_device(remote_fencing_op_t *op, const char *device)
809 {
810 GList *match = g_list_find_custom(op->automatic_list, device,
811 sort_strings);
812
813 if (match) {
814 op->automatic_list = g_list_remove(op->automatic_list, match->data);
815 }
816 }
817
818
819 static void
820 set_op_device_list(remote_fencing_op_t * op, GList *devices)
821 {
822 GList *lpc = NULL;
823
824 if (op->devices_list) {
825 g_list_free_full(op->devices_list, free);
826 op->devices_list = NULL;
827 }
828 for (lpc = devices; lpc != NULL; lpc = lpc->next) {
829 op->devices_list = g_list_append(op->devices_list, strdup(lpc->data));
830 }
831 op->devices = op->devices_list;
832 }
833
834
835
836
837
838
839
840
841
842
843 static gboolean
844 topology_matches(const stonith_topology_t *tp, const char *node)
845 {
846 regex_t r_patt;
847
848 CRM_CHECK(node && tp && tp->target, return FALSE);
849 switch (tp->kind) {
850 case fenced_target_by_attribute:
851
852
853
854
855
856
857 if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
858 crm_notice("Matched %s with %s by attribute", node, tp->target);
859 return TRUE;
860 }
861 break;
862
863 case fenced_target_by_pattern:
864
865
866
867 if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) {
868 crm_info("Bad regex '%s' for fencing level", tp->target);
869 } else {
870 int status = regexec(&r_patt, node, 0, NULL, 0);
871
872 regfree(&r_patt);
873 if (status == 0) {
874 crm_notice("Matched %s with %s by name", node, tp->target);
875 return TRUE;
876 }
877 }
878 break;
879
880 case fenced_target_by_name:
881 crm_trace("Testing %s against %s", node, tp->target);
882 return pcmk__str_eq(tp->target, node, pcmk__str_casei);
883
884 default:
885 break;
886 }
887 crm_trace("No match for %s with %s", node, tp->target);
888 return FALSE;
889 }
890
891 stonith_topology_t *
892 find_topology_for_host(const char *host)
893 {
894 GHashTableIter tIter;
895 stonith_topology_t *tp = g_hash_table_lookup(topology, host);
896
897 if(tp != NULL) {
898 crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
899 return tp;
900 }
901
902 g_hash_table_iter_init(&tIter, topology);
903 while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
904 if (topology_matches(tp, host)) {
905 crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
906 return tp;
907 }
908 }
909
910 crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
911 return NULL;
912 }
913
914
915
916
917
918
919
920
921
922
923
924
925 static int
926 advance_topology_level(remote_fencing_op_t *op, bool empty_ok)
927 {
928 stonith_topology_t *tp = NULL;
929
930 if (op->target) {
931 tp = find_topology_for_host(op->target);
932 }
933 if (topology_is_empty(tp)) {
934 return empty_ok? pcmk_rc_ok : ENODEV;
935 }
936
937 CRM_ASSERT(tp->levels != NULL);
938
939 stonith__set_call_options(op->call_options, op->id, st_opt_topology);
940
941
942 undo_op_remap(op);
943
944 do {
945 op->level++;
946
947 } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
948
949 if (op->level < ST_LEVEL_MAX) {
950 crm_trace("Attempting fencing level %d targeting %s (%d devices) "
951 "for client %s@%s (id=%.8s)",
952 op->level, op->target, g_list_length(tp->levels[op->level]),
953 op->client_name, op->originator, op->id);
954 set_op_device_list(op, tp->levels[op->level]);
955
956
957 if (op->level > 1 && op->delay > 0) {
958 op->delay = 0;
959 }
960
961 if ((g_list_next(op->devices_list) != NULL)
962 && pcmk__str_eq(op->action, "reboot", pcmk__str_none)) {
963
964
965
966
967
968 op_phase_off(op);
969 }
970 return pcmk_rc_ok;
971 }
972
973 crm_info("All fencing options targeting %s for client %s@%s failed "
974 CRM_XS " id=%.8s",
975 op->target, op->client_name, op->originator, op->id);
976 return ENODEV;
977 }
978
979
980
981
982
983
984
985 static void
986 merge_duplicates(remote_fencing_op_t *op)
987 {
988 GHashTableIter iter;
989 remote_fencing_op_t *other = NULL;
990
991 time_t now = time(NULL);
992
993 g_hash_table_iter_init(&iter, stonith_remote_op_list);
994 while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
995 const char *other_action = op_requested_action(other);
996
997 if (!strcmp(op->id, other->id)) {
998 continue;
999 }
1000 if (other->state > st_exec) {
1001 crm_trace("%.8s not duplicate of %.8s: not in progress",
1002 op->id, other->id);
1003 continue;
1004 }
1005 if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) {
1006 crm_trace("%.8s not duplicate of %.8s: node %s vs. %s",
1007 op->id, other->id, op->target, other->target);
1008 continue;
1009 }
1010 if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) {
1011 crm_trace("%.8s not duplicate of %.8s: action %s vs. %s",
1012 op->id, other->id, op->action, other_action);
1013 continue;
1014 }
1015 if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) {
1016 crm_trace("%.8s not duplicate of %.8s: same client %s",
1017 op->id, other->id, op->client_name);
1018 continue;
1019 }
1020 if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) {
1021 crm_trace("%.8s not duplicate of %.8s: suicide for %s",
1022 op->id, other->id, other->target);
1023 continue;
1024 }
1025 if (!fencing_peer_active(crm_get_peer(0, other->originator))) {
1026 crm_notice("Failing action '%s' targeting %s originating from "
1027 "client %s@%s: Originator is dead " CRM_XS " id=%.8s",
1028 other->action, other->target, other->client_name,
1029 other->originator, other->id);
1030 crm_trace("%.8s not duplicate of %.8s: originator dead",
1031 op->id, other->id);
1032 other->state = st_failed;
1033 continue;
1034 }
1035 if ((other->total_timeout > 0)
1036 && (now > (other->total_timeout + other->created))) {
1037 crm_trace("%.8s not duplicate of %.8s: old (%ld vs. %ld + %d)",
1038 op->id, other->id, now, other->created,
1039 other->total_timeout);
1040 continue;
1041 }
1042
1043
1044
1045
1046 other->duplicates = g_list_append(other->duplicates, op);
1047 if (other->total_timeout == 0) {
1048 other->total_timeout = op->total_timeout =
1049 TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
1050 crm_trace("Best guess as to timeout used for %.8s: %d",
1051 other->id, other->total_timeout);
1052 }
1053 crm_notice("Merging fencing action '%s' targeting %s originating from "
1054 "client %s with identical request from %s@%s "
1055 CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds",
1056 op->action, op->target, op->client_name,
1057 other->client_name, other->originator,
1058 op->id, other->id, other->total_timeout);
1059 report_timeout_period(op, other->total_timeout);
1060 op->state = st_duplicate;
1061 }
1062 }
1063
1064 static uint32_t fencing_active_peers(void)
1065 {
1066 uint32_t count = 0;
1067 crm_node_t *entry;
1068 GHashTableIter gIter;
1069
1070 g_hash_table_iter_init(&gIter, crm_peer_cache);
1071 while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
1072 if(fencing_peer_active(entry)) {
1073 count++;
1074 }
1075 }
1076 return count;
1077 }
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088 int
1089 fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg)
1090 {
1091 remote_fencing_op_t *op = NULL;
1092 xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
1093
1094 CRM_CHECK(dev != NULL, return EPROTO);
1095
1096 crm_notice("Received manual confirmation that %s has been fenced",
1097 pcmk__s(crm_element_value(dev, F_STONITH_TARGET),
1098 "unknown target"));
1099 op = initiate_remote_stonith_op(client, msg, TRUE);
1100 if (op == NULL) {
1101 return EPROTO;
1102 }
1103 op->state = st_done;
1104 set_fencing_completed(op);
1105 op->delegate = strdup("a human");
1106
1107
1108 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1109 finalize_op(op, msg, false);
1110
1111
1112
1113
1114 return EINPROGRESS;
1115 }
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128 void *
1129 create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer)
1130 {
1131 remote_fencing_op_t *op = NULL;
1132 xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_NEVER);
1133 int call_options = 0;
1134 const char *operation = NULL;
1135
1136 init_stonith_remote_op_hash_table(&stonith_remote_op_list);
1137
1138
1139
1140 if (peer && dev) {
1141 const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
1142
1143 CRM_CHECK(op_id != NULL, return NULL);
1144
1145 op = g_hash_table_lookup(stonith_remote_op_list, op_id);
1146 if (op) {
1147 crm_debug("Reusing existing remote fencing op %.8s for %s",
1148 op_id, ((client == NULL)? "unknown client" : client));
1149 return op;
1150 }
1151 }
1152
1153 op = calloc(1, sizeof(remote_fencing_op_t));
1154 CRM_ASSERT(op != NULL);
1155
1156 crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
1157
1158 crm_element_value_int(request, F_STONITH_DELAY, &(op->delay));
1159
1160 if (peer && dev) {
1161 op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
1162 } else {
1163 op->id = crm_generate_uuid();
1164 }
1165
1166 g_hash_table_replace(stonith_remote_op_list, op->id, op);
1167
1168 op->state = st_query;
1169 op->replies_expected = fencing_active_peers();
1170 op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
1171 op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN);
1172 op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE);
1173 op->created = time(NULL);
1174
1175 if (op->originator == NULL) {
1176
1177 op->originator = strdup(stonith_our_uname);
1178 }
1179
1180 CRM_LOG_ASSERT(client != NULL);
1181 if (client) {
1182 op->client_id = strdup(client);
1183 }
1184
1185
1186
1187 operation = crm_element_value(request, F_STONITH_OPERATION);
1188
1189 if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
1190 op->client_name = crm_strdup_printf("%s.%lu", crm_system_name,
1191 (unsigned long) getpid());
1192 } else {
1193 op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);
1194 }
1195
1196 op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
1197 op->request = copy_xml(request);
1198 crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
1199 op->call_options = call_options;
1200
1201 crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid));
1202
1203 crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, "
1204 "base timeout %d, %u %s expected)",
1205 (peer && dev)? "Recorded" : "Generated", op->id, op->action,
1206 op->target, op->client_name, op->base_timeout,
1207 op->replies_expected,
1208 pcmk__plural_alt(op->replies_expected, "reply", "replies"));
1209
1210 if (op->call_options & st_opt_cs_nodeid) {
1211 int nodeid;
1212 crm_node_t *node;
1213
1214 pcmk__scan_min_int(op->target, &nodeid, 0);
1215 node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
1216
1217
1218 stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid);
1219
1220 if (node && node->uname) {
1221 free(op->target);
1222 op->target = strdup(node->uname);
1223
1224 } else {
1225 crm_warn("Could not expand nodeid '%s' into a host name", op->target);
1226 }
1227 }
1228
1229
1230 merge_duplicates(op);
1231
1232 if (op->state != st_duplicate) {
1233
1234 fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
1235 }
1236
1237
1238 stonith_fence_history_trim();
1239
1240 return op;
1241 }
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253 remote_fencing_op_t *
1254 initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request,
1255 gboolean manual_ack)
1256 {
1257 int query_timeout = 0;
1258 xmlNode *query = NULL;
1259 const char *client_id = NULL;
1260 remote_fencing_op_t *op = NULL;
1261 const char *relay_op_id = NULL;
1262 const char *operation = NULL;
1263
1264 if (client) {
1265 client_id = client->id;
1266 } else {
1267 client_id = crm_element_value(request, F_STONITH_CLIENTID);
1268 }
1269
1270 CRM_LOG_ASSERT(client_id != NULL);
1271 op = create_remote_stonith_op(client_id, request, FALSE);
1272 op->owner = TRUE;
1273 if (manual_ack) {
1274 return op;
1275 }
1276
1277 CRM_CHECK(op->action, return NULL);
1278
1279 if (advance_topology_level(op, true) != pcmk_rc_ok) {
1280 op->state = st_failed;
1281 }
1282
1283 switch (op->state) {
1284 case st_failed:
1285
1286 pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
1287 "All topology levels failed");
1288 crm_warn("Could not request peer fencing (%s) targeting %s "
1289 CRM_XS " id=%.8s", op->action, op->target, op->id);
1290 finalize_op(op, NULL, false);
1291 return op;
1292
1293 case st_duplicate:
1294 crm_info("Requesting peer fencing (%s) targeting %s (duplicate) "
1295 CRM_XS " id=%.8s", op->action, op->target, op->id);
1296 return op;
1297
1298 default:
1299 crm_notice("Requesting peer fencing (%s) targeting %s "
1300 CRM_XS " id=%.8s state=%s base_timeout=%d",
1301 op->action, op->target, op->id,
1302 stonith_op_state_str(op->state), op->base_timeout);
1303 }
1304
1305 query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
1306 NULL, op->call_options);
1307
1308 crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
1309 crm_xml_add(query, F_STONITH_TARGET, op->target);
1310 crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op));
1311 crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
1312 crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
1313 crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
1314 crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
1315
1316
1317 operation = crm_element_value(request, F_STONITH_OPERATION);
1318 if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
1319 relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
1320 if (relay_op_id) {
1321 crm_xml_add(query, F_STONITH_REMOTE_OP_ID_RELAY, relay_op_id);
1322 }
1323 }
1324
1325 send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
1326 free_xml(query);
1327
1328 query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
1329 op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
1330
1331 return op;
1332 }
1333
1334 enum find_best_peer_options {
1335
1336 FIND_PEER_SKIP_TARGET = 0x0001,
1337
1338 FIND_PEER_TARGET_ONLY = 0x0002,
1339
1340 FIND_PEER_VERIFIED_ONLY = 0x0004,
1341 };
1342
1343 static peer_device_info_t *
1344 find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
1345 {
1346 GList *iter = NULL;
1347 gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
1348
1349 if (!device && pcmk_is_set(op->call_options, st_opt_topology)) {
1350 return NULL;
1351 }
1352
1353 for (iter = op->query_results; iter != NULL; iter = iter->next) {
1354 peer_device_info_t *peer = iter->data;
1355
1356 crm_trace("Testing result from %s targeting %s with %d device%s: %d %x",
1357 peer->host, op->target, peer->ndevices,
1358 pcmk__plural_s(peer->ndevices), peer->tried, options);
1359 if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
1360 continue;
1361 }
1362 if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
1363 continue;
1364 }
1365
1366 if (pcmk_is_set(op->call_options, st_opt_topology)) {
1367
1368 if (grab_peer_device(op, peer, device, verified_devices_only)) {
1369 return peer;
1370 }
1371
1372 } else if (!peer->tried
1373 && count_peer_devices(op, peer, verified_devices_only,
1374 fenced_support_flag(op->action))) {
1375
1376 crm_trace("Simple fencing");
1377 return peer;
1378 }
1379 }
1380
1381 return NULL;
1382 }
1383
1384 static peer_device_info_t *
1385 stonith_choose_peer(remote_fencing_op_t * op)
1386 {
1387 const char *device = NULL;
1388 peer_device_info_t *peer = NULL;
1389 uint32_t active = fencing_active_peers();
1390
1391 do {
1392 if (op->devices) {
1393 device = op->devices->data;
1394 crm_trace("Checking for someone to fence (%s) %s using %s",
1395 op->action, op->target, device);
1396 } else {
1397 crm_trace("Checking for someone to fence (%s) %s",
1398 op->action, op->target);
1399 }
1400
1401
1402 peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
1403 if (peer) {
1404 crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
1405 return peer;
1406 }
1407
1408 if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
1409 crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
1410 return NULL;
1411 }
1412
1413
1414 peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
1415 if (peer) {
1416 crm_trace("Found best unverified peer %s", peer->host);
1417 return peer;
1418 }
1419
1420
1421
1422
1423 if (op->phase != st_phase_on) {
1424 peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
1425 if (peer) {
1426 crm_trace("%s will fence itself", peer->host);
1427 return peer;
1428 }
1429 }
1430
1431
1432
1433
1434 } while ((op->phase != st_phase_on)
1435 && pcmk_is_set(op->call_options, st_opt_topology)
1436 && (advance_topology_level(op, false) == pcmk_rc_ok));
1437
1438 crm_notice("Couldn't find anyone to fence (%s) %s using %s",
1439 op->action, op->target, (device? device : "any device"));
1440 return NULL;
1441 }
1442
1443 static int
1444 get_device_timeout(const remote_fencing_op_t *op,
1445 const peer_device_info_t *peer, const char *device)
1446 {
1447 device_properties_t *props;
1448
1449 if (!peer || !device) {
1450 return op->base_timeout;
1451 }
1452
1453 props = g_hash_table_lookup(peer->devices, device);
1454 if (!props) {
1455 return op->base_timeout;
1456 }
1457
1458 return (props->custom_action_timeout[op->phase]?
1459 props->custom_action_timeout[op->phase] : op->base_timeout)
1460 + props->delay_max[op->phase];
1461 }
1462
1463 struct timeout_data {
1464 const remote_fencing_op_t *op;
1465 const peer_device_info_t *peer;
1466 int total_timeout;
1467 };
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477 static void
1478 add_device_timeout(gpointer key, gpointer value, gpointer user_data)
1479 {
1480 const char *device_id = key;
1481 device_properties_t *props = value;
1482 struct timeout_data *timeout = user_data;
1483
1484 if (!props->executed[timeout->op->phase]
1485 && !props->disallowed[timeout->op->phase]) {
1486 timeout->total_timeout += get_device_timeout(timeout->op,
1487 timeout->peer, device_id);
1488 }
1489 }
1490
1491 static int
1492 get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer)
1493 {
1494 struct timeout_data timeout;
1495
1496 timeout.op = op;
1497 timeout.peer = peer;
1498 timeout.total_timeout = 0;
1499
1500 g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
1501
1502 return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
1503 }
1504
1505 static int
1506 get_op_total_timeout(const remote_fencing_op_t *op,
1507 const peer_device_info_t *chosen_peer)
1508 {
1509 int total_timeout = 0;
1510 stonith_topology_t *tp = find_topology_for_host(op->target);
1511
1512 if (pcmk_is_set(op->call_options, st_opt_topology) && tp) {
1513 int i;
1514 GList *device_list = NULL;
1515 GList *iter = NULL;
1516 GList *auto_list = NULL;
1517
1518 if (pcmk__str_eq(op->action, "on", pcmk__str_none)
1519 && (op->automatic_list != NULL)) {
1520 auto_list = g_list_copy(op->automatic_list);
1521 }
1522
1523
1524
1525
1526
1527
1528
1529
1530 for (i = 0; i < ST_LEVEL_MAX; i++) {
1531 if (!tp->levels[i]) {
1532 continue;
1533 }
1534 for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
1535 for (iter = op->query_results; iter != NULL; iter = iter->next) {
1536 const peer_device_info_t *peer = iter->data;
1537
1538 if (auto_list) {
1539 GList *match = g_list_find_custom(auto_list, device_list->data,
1540 sort_strings);
1541 if (match) {
1542 auto_list = g_list_remove(auto_list, match->data);
1543 }
1544 }
1545
1546 if (find_peer_device(op, peer, device_list->data,
1547 fenced_support_flag(op->action))) {
1548 total_timeout += get_device_timeout(op, peer,
1549 device_list->data);
1550 break;
1551 }
1552 }
1553 }
1554 }
1555
1556
1557 if (auto_list) {
1558 for (iter = auto_list; iter != NULL; iter = iter->next) {
1559 GList *iter2 = NULL;
1560
1561 for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) {
1562 peer_device_info_t *peer = iter2->data;
1563 if (find_peer_device(op, peer, iter->data, st_device_supports_on)) {
1564 total_timeout += get_device_timeout(op, peer, iter->data);
1565 break;
1566 }
1567 }
1568 }
1569 }
1570
1571 g_list_free(auto_list);
1572
1573 } else if (chosen_peer) {
1574 total_timeout = get_peer_timeout(op, chosen_peer);
1575 } else {
1576 total_timeout = op->base_timeout;
1577 }
1578
1579 return total_timeout ? total_timeout : op->base_timeout;
1580 }
1581
1582 static void
1583 report_timeout_period(remote_fencing_op_t * op, int op_timeout)
1584 {
1585 GList *iter = NULL;
1586 xmlNode *update = NULL;
1587 const char *client_node = NULL;
1588 const char *client_id = NULL;
1589 const char *call_id = NULL;
1590
1591 if (op->call_options & st_opt_sync_call) {
1592
1593
1594
1595
1596 return;
1597 } else if (!op->request) {
1598 return;
1599 }
1600
1601 crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id);
1602 client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE);
1603 call_id = crm_element_value(op->request, F_STONITH_CALLID);
1604 client_id = crm_element_value(op->request, F_STONITH_CLIENTID);
1605 if (!client_node || !call_id || !client_id) {
1606 return;
1607 }
1608
1609 if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) {
1610
1611 do_stonith_async_timeout_update(client_id, call_id, op_timeout);
1612 return;
1613 }
1614
1615
1616 update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
1617 crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id);
1618 crm_xml_add(update, F_STONITH_CLIENTID, client_id);
1619 crm_xml_add(update, F_STONITH_CALLID, call_id);
1620 crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout);
1621
1622 send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE);
1623
1624 free_xml(update);
1625
1626 for (iter = op->duplicates; iter != NULL; iter = iter->next) {
1627 remote_fencing_op_t *dup = iter->data;
1628
1629 crm_trace("Reporting timeout for duplicate %.8s to client %s",
1630 dup->id, dup->client_name);
1631 report_timeout_period(iter->data, op_timeout);
1632 }
1633 }
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643 static void
1644 advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
1645 xmlNode *msg)
1646 {
1647
1648 if (op->devices) {
1649 op->devices = op->devices->next;
1650 }
1651
1652
1653 if ((op->phase == st_phase_requested)
1654 && pcmk__str_eq(op->action, "on", pcmk__str_none)) {
1655
1656 remove_required_device(op, device);
1657
1658
1659
1660
1661 if (op->devices == NULL) {
1662 op->devices = op->automatic_list;
1663 }
1664 }
1665
1666 if ((op->devices == NULL) && (op->phase == st_phase_off)) {
1667
1668
1669
1670
1671 op_phase_on(op);
1672 }
1673
1674
1675 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1676
1677 if (op->devices) {
1678
1679 crm_trace("Next targeting %s on behalf of %s@%s",
1680 op->target, op->client_name, op->originator);
1681
1682
1683 if (op->delay > 0) {
1684 op->delay = 0;
1685 }
1686
1687 request_peer_fencing(op, NULL);
1688 } else {
1689
1690 crm_trace("Marking complex fencing op targeting %s as complete",
1691 op->target);
1692 op->state = st_done;
1693 finalize_op(op, msg, false);
1694 }
1695 }
1696
1697 static gboolean
1698 check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
1699 {
1700 if (node_does_watchdog_fencing(op->target)) {
1701
1702 crm_notice("Waiting %lds for %s to self-fence (%s) for "
1703 "client %s " CRM_XS " id=%.8s",
1704 (stonith_watchdog_timeout_ms / 1000),
1705 op->target, op->action, op->client_name, op->id);
1706 op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms,
1707 remote_op_watchdog_done, op);
1708 return TRUE;
1709 } else {
1710 crm_debug("Skipping fallback to watchdog-fencing as %s is "
1711 "not in host-list", op->target);
1712 }
1713 return FALSE;
1714 }
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724 static void
1725 request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
1726 {
1727 const char *device = NULL;
1728 int timeout;
1729
1730 CRM_CHECK(op != NULL, return);
1731
1732 crm_trace("Action %.8s targeting %s for %s is %s",
1733 op->id, op->target, op->client_name,
1734 stonith_op_state_str(op->state));
1735
1736 if ((op->phase == st_phase_on) && (op->devices != NULL)) {
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748 device = op->devices->data;
1749 if (pcmk__str_eq(fenced_device_reboot_action(device), "off",
1750 pcmk__str_none)) {
1751 crm_info("Not turning %s back on using %s because the device is "
1752 "configured to stay off (pcmk_reboot_action='off')",
1753 op->target, device);
1754 advance_topology_device_in_level(op, device, NULL);
1755 return;
1756 }
1757 if (!fenced_device_supports_on(device)) {
1758 crm_info("Not turning %s back on using %s because the agent "
1759 "doesn't support 'on'", op->target, device);
1760 advance_topology_device_in_level(op, device, NULL);
1761 return;
1762 }
1763 }
1764
1765 timeout = op->base_timeout;
1766 if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) {
1767 peer = stonith_choose_peer(op);
1768 }
1769
1770 if (!op->op_timer_total) {
1771 op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer);
1772 op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
1773 report_timeout_period(op, op->total_timeout);
1774 crm_info("Total timeout set to %d for peer's fencing targeting %s for %s"
1775 CRM_XS "id=%.8s",
1776 op->total_timeout, op->target, op->client_name, op->id);
1777 }
1778
1779 if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) {
1780
1781
1782
1783
1784
1785
1786
1787
1788 peer = stonith_choose_peer(op);
1789
1790 device = op->devices->data;
1791 timeout = get_device_timeout(op, peer, device);
1792 }
1793
1794 if (peer) {
1795 int timeout_one = 0;
1796 xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
1797
1798 crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id);
1799 crm_xml_add(remote_op, F_STONITH_TARGET, op->target);
1800 crm_xml_add(remote_op, F_STONITH_ACTION, op->action);
1801 crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator);
1802 crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id);
1803 crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
1804 crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
1805 crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
1806 crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay);
1807
1808 if (device) {
1809 timeout_one = TIMEOUT_MULTIPLY_FACTOR *
1810 get_device_timeout(op, peer, device);
1811 crm_notice("Requesting that %s perform '%s' action targeting %s "
1812 "using %s " CRM_XS " for client %s (%ds)",
1813 peer->host, op->action, op->target, device,
1814 op->client_name, timeout_one);
1815 crm_xml_add(remote_op, F_STONITH_DEVICE, device);
1816
1817 } else {
1818 timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
1819 crm_notice("Requesting that %s perform '%s' action targeting %s "
1820 CRM_XS " for client %s (%ds, %lds)",
1821 peer->host, op->action, op->target, op->client_name,
1822 timeout_one, stonith_watchdog_timeout_ms);
1823 }
1824
1825 op->state = st_exec;
1826 if (op->op_timer_one) {
1827 g_source_remove(op->op_timer_one);
1828 }
1829
1830 if (!((stonith_watchdog_timeout_ms > 0)
1831 && (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
1832 || (pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
1833 && !pcmk__str_eq(op->action, "on", pcmk__str_none)))
1834 && check_watchdog_fencing_and_wait(op))) {
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853 op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
1854 }
1855
1856 send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
1857 peer->tried = TRUE;
1858 free_xml(remote_op);
1859 return;
1860
1861 } else if (op->phase == st_phase_on) {
1862
1863
1864
1865 crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
1866 "after successful 'off'", device, op->target);
1867 advance_topology_device_in_level(op, device, NULL);
1868 return;
1869
1870 } else if (op->owner == FALSE) {
1871 crm_err("Fencing (%s) targeting %s for client %s is not ours to control",
1872 op->action, op->target, op->client_name);
1873
1874 } else if (op->query_timer == 0) {
1875
1876 crm_info("No remaining peers capable of fencing (%s) %s for client %s "
1877 CRM_XS " state=%s", op->action, op->target, op->client_name,
1878 stonith_op_state_str(op->state));
1879 CRM_CHECK(op->state < st_done, return);
1880 finalize_timed_out_op(op, "All nodes failed, or are unable, to "
1881 "fence target");
1882
1883 } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
1884
1885
1886
1887
1888 if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device,
1889 STONITH_WATCHDOG_ID, pcmk__str_null_matches)) {
1890 if (check_watchdog_fencing_and_wait(op)) {
1891 return;
1892 }
1893 }
1894
1895 if (op->state == st_query) {
1896 crm_info("No peers (out of %d) have devices capable of fencing "
1897 "(%s) %s for client %s " CRM_XS " state=%s",
1898 op->replies, op->action, op->target, op->client_name,
1899 stonith_op_state_str(op->state));
1900
1901 pcmk__reset_result(&op->result);
1902 pcmk__set_result(&op->result, CRM_EX_ERROR,
1903 PCMK_EXEC_NO_FENCE_DEVICE, NULL);
1904 } else {
1905 if (pcmk_is_set(op->call_options, st_opt_topology)) {
1906 pcmk__reset_result(&op->result);
1907 pcmk__set_result(&op->result, CRM_EX_ERROR,
1908 PCMK_EXEC_NO_FENCE_DEVICE, NULL);
1909 }
1910
1911
1912
1913
1914
1915
1916
1917 crm_info("No peers (out of %d) are capable of fencing (%s) %s "
1918 "for client %s " CRM_XS " state=%s",
1919 op->replies, op->action, op->target, op->client_name,
1920 stonith_op_state_str(op->state));
1921 }
1922
1923 op->state = st_failed;
1924 finalize_op(op, NULL, false);
1925
1926 } else {
1927 crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
1928 "for client %s " CRM_XS " id=%.8s",
1929 op->action, op->target, (device? " using " : ""),
1930 (device? device : ""), op->client_name, op->id);
1931 }
1932 }
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945 static gint
1946 sort_peers(gconstpointer a, gconstpointer b)
1947 {
1948 const peer_device_info_t *peer_a = a;
1949 const peer_device_info_t *peer_b = b;
1950
1951 return (peer_b->ndevices - peer_a->ndevices);
1952 }
1953
1954
1955
1956
1957
1958
1959
1960 static gboolean
1961 all_topology_devices_found(const remote_fencing_op_t *op)
1962 {
1963 GList *device = NULL;
1964 GList *iter = NULL;
1965 device_properties_t *match = NULL;
1966 stonith_topology_t *tp = NULL;
1967 gboolean skip_target = FALSE;
1968 int i;
1969
1970 tp = find_topology_for_host(op->target);
1971 if (!tp) {
1972 return FALSE;
1973 }
1974 if (pcmk__is_fencing_action(op->action)) {
1975
1976
1977 skip_target = TRUE;
1978 }
1979
1980 for (i = 0; i < ST_LEVEL_MAX; i++) {
1981 for (device = tp->levels[i]; device; device = device->next) {
1982 match = NULL;
1983 for (iter = op->query_results; iter && !match; iter = iter->next) {
1984 peer_device_info_t *peer = iter->data;
1985
1986 if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
1987 continue;
1988 }
1989 match = find_peer_device(op, peer, device->data, st_device_supports_none);
1990 }
1991 if (!match) {
1992 return FALSE;
1993 }
1994 }
1995 }
1996
1997 return TRUE;
1998 }
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012 static void
2013 parse_action_specific(const xmlNode *xml, const char *peer, const char *device,
2014 const char *action, remote_fencing_op_t *op,
2015 enum st_remap_phase phase, device_properties_t *props)
2016 {
2017 props->custom_action_timeout[phase] = 0;
2018 crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT,
2019 &props->custom_action_timeout[phase]);
2020 if (props->custom_action_timeout[phase]) {
2021 crm_trace("Peer %s with device %s returned %s action timeout %d",
2022 peer, device, action, props->custom_action_timeout[phase]);
2023 }
2024
2025 props->delay_max[phase] = 0;
2026 crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]);
2027 if (props->delay_max[phase]) {
2028 crm_trace("Peer %s with device %s returned maximum of random delay %d for %s",
2029 peer, device, props->delay_max[phase], action);
2030 }
2031
2032 props->delay_base[phase] = 0;
2033 crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]);
2034 if (props->delay_base[phase]) {
2035 crm_trace("Peer %s with device %s returned base delay %d for %s",
2036 peer, device, props->delay_base[phase], action);
2037 }
2038
2039
2040 if (pcmk__str_eq(action, "on", pcmk__str_none)) {
2041 int required = 0;
2042
2043 crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required);
2044 if (required) {
2045 crm_trace("Peer %s requires device %s to execute for action %s",
2046 peer, device, action);
2047 add_required_device(op, device);
2048 }
2049 }
2050
2051
2052
2053
2054 if (pcmk__xe_attr_is_true(xml, F_STONITH_ACTION_DISALLOWED)) {
2055 props->disallowed[phase] = TRUE;
2056 crm_trace("Peer %s is disallowed from executing %s for device %s",
2057 peer, action, device);
2058 }
2059 }
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070 static void
2071 add_device_properties(const xmlNode *xml, remote_fencing_op_t *op,
2072 peer_device_info_t *peer, const char *device)
2073 {
2074 xmlNode *child;
2075 int verified = 0;
2076 device_properties_t *props = calloc(1, sizeof(device_properties_t));
2077 int flags = st_device_supports_on;
2078
2079
2080 CRM_ASSERT(props != NULL);
2081 g_hash_table_insert(peer->devices, strdup(device), props);
2082
2083
2084 crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified);
2085 if (verified) {
2086 crm_trace("Peer %s has confirmed a verified device %s",
2087 peer->host, device);
2088 props->verified = TRUE;
2089 }
2090
2091 crm_element_value_int(xml, F_STONITH_DEVICE_SUPPORT_FLAGS, &flags);
2092 props->device_support_flags = flags;
2093
2094
2095 parse_action_specific(xml, peer->host, device, op_requested_action(op),
2096 op, st_phase_requested, props);
2097 for (child = pcmk__xml_first_child(xml); child != NULL;
2098 child = pcmk__xml_next(child)) {
2099
2100
2101
2102
2103 if (pcmk__str_eq(ID(child), "off", pcmk__str_none)) {
2104 parse_action_specific(child, peer->host, device, "off",
2105 op, st_phase_off, props);
2106 } else if (pcmk__str_eq(ID(child), "on", pcmk__str_none)) {
2107 parse_action_specific(child, peer->host, device, "on",
2108 op, st_phase_on, props);
2109 }
2110 }
2111 }
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124 static peer_device_info_t *
2125 add_result(remote_fencing_op_t *op, const char *host, int ndevices,
2126 const xmlNode *xml)
2127 {
2128 peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t));
2129 xmlNode *child;
2130
2131
2132
2133 CRM_CHECK(peer != NULL, return NULL);
2134 peer->host = strdup(host);
2135 peer->devices = pcmk__strkey_table(free, free);
2136
2137
2138 for (child = pcmk__xml_first_child(xml); child != NULL;
2139 child = pcmk__xml_next(child)) {
2140 const char *device = ID(child);
2141
2142 if (device) {
2143 add_device_properties(child, op, peer, device);
2144 }
2145 }
2146
2147 peer->ndevices = g_hash_table_size(peer->devices);
2148 CRM_CHECK(ndevices == peer->ndevices,
2149 crm_err("Query claimed to have %d device%s but %d found",
2150 ndevices, pcmk__plural_s(ndevices), peer->ndevices));
2151
2152 op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers);
2153 return peer;
2154 }
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170 int
2171 process_remote_stonith_query(xmlNode *msg)
2172 {
2173 int ndevices = 0;
2174 gboolean host_is_target = FALSE;
2175 gboolean have_all_replies = FALSE;
2176 const char *id = NULL;
2177 const char *host = NULL;
2178 remote_fencing_op_t *op = NULL;
2179 peer_device_info_t *peer = NULL;
2180 uint32_t replies_expected;
2181 xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
2182
2183 CRM_CHECK(dev != NULL, return -EPROTO);
2184
2185 id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
2186 CRM_CHECK(id != NULL, return -EPROTO);
2187
2188 dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR);
2189 CRM_CHECK(dev != NULL, return -EPROTO);
2190 crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices);
2191
2192 op = g_hash_table_lookup(stonith_remote_op_list, id);
2193 if (op == NULL) {
2194 crm_debug("Received query reply for unknown or expired operation %s",
2195 id);
2196 return -EOPNOTSUPP;
2197 }
2198
2199 replies_expected = fencing_active_peers();
2200 if (op->replies_expected < replies_expected) {
2201 replies_expected = op->replies_expected;
2202 }
2203 if ((++op->replies >= replies_expected) && (op->state == st_query)) {
2204 have_all_replies = TRUE;
2205 }
2206 host = crm_element_value(msg, F_ORIG);
2207 host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei);
2208
2209 crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s",
2210 op->replies, replies_expected, host,
2211 op->target, op->action, ndevices, pcmk__plural_s(ndevices), id);
2212 if (ndevices > 0) {
2213 peer = add_result(op, host, ndevices, dev);
2214 }
2215
2216 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2217
2218 if (pcmk_is_set(op->call_options, st_opt_topology)) {
2219
2220
2221
2222 if (op->state == st_query && all_topology_devices_found(op)) {
2223
2224 crm_trace("All topology devices found");
2225 request_peer_fencing(op, peer);
2226
2227 } else if (have_all_replies) {
2228 crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
2229 replies_expected, op->replies);
2230 request_peer_fencing(op, NULL);
2231 }
2232
2233 } else if (op->state == st_query) {
2234 int nverified = count_peer_devices(op, peer, TRUE,
2235 fenced_support_flag(op->action));
2236
2237
2238
2239 if ((peer != NULL) && !host_is_target && nverified) {
2240
2241 crm_trace("Found %d verified device%s",
2242 nverified, pcmk__plural_s(nverified));
2243 request_peer_fencing(op, peer);
2244
2245 } else if (have_all_replies) {
2246 crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
2247 replies_expected, op->replies);
2248 request_peer_fencing(op, NULL);
2249
2250 } else {
2251 crm_trace("Waiting for more peer results before launching fencing operation");
2252 }
2253
2254 } else if ((peer != NULL) && (op->state == st_done)) {
2255 crm_info("Discarding query result from %s (%d device%s): "
2256 "Operation is %s", peer->host,
2257 peer->ndevices, pcmk__plural_s(peer->ndevices),
2258 stonith_op_state_str(op->state));
2259 }
2260
2261 return pcmk_ok;
2262 }
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273 void
2274 fenced_process_fencing_reply(xmlNode *msg)
2275 {
2276 const char *id = NULL;
2277 const char *device = NULL;
2278 remote_fencing_op_t *op = NULL;
2279 xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
2280 pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
2281
2282 CRM_CHECK(dev != NULL, return);
2283
2284 id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
2285 CRM_CHECK(id != NULL, return);
2286
2287 dev = stonith__find_xe_with_result(msg);
2288 CRM_CHECK(dev != NULL, return);
2289
2290 stonith__xe_get_result(dev, &result);
2291
2292 device = crm_element_value(dev, F_STONITH_DEVICE);
2293
2294 if (stonith_remote_op_list) {
2295 op = g_hash_table_lookup(stonith_remote_op_list, id);
2296 }
2297
2298 if ((op == NULL) && pcmk__result_ok(&result)) {
2299
2300 const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
2301
2302 op = create_remote_stonith_op(client_id, dev, TRUE);
2303 }
2304
2305 if (op == NULL) {
2306
2307
2308 crm_info("Received peer result of unknown or expired operation %s", id);
2309 pcmk__reset_result(&result);
2310 return;
2311 }
2312
2313 pcmk__reset_result(&op->result);
2314 op->result = result;
2315
2316 if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
2317 crm_err("Received outdated reply for device %s (instead of %s) to "
2318 "fence (%s) %s. Operation already timed out at peer level.",
2319 device, (const char *) op->devices->data, op->action, op->target);
2320 return;
2321 }
2322
2323 if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
2324 if (pcmk__result_ok(&op->result)) {
2325 op->state = st_done;
2326 } else {
2327 op->state = st_failed;
2328 }
2329 finalize_op(op, msg, false);
2330 return;
2331
2332 } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
2333
2334
2335 crm_err("Received non-broadcast fencing result for operation %.8s "
2336 "we do not own (device %s targeting %s)",
2337 op->id, device, op->target);
2338 return;
2339 }
2340
2341 if (pcmk_is_set(op->call_options, st_opt_topology)) {
2342 const char *device = NULL;
2343 const char *reason = op->result.exit_reason;
2344
2345
2346
2347 if (op->state == st_done) {
2348 finalize_op(op, msg, false);
2349 return;
2350 }
2351
2352 device = crm_element_value(msg, F_STONITH_DEVICE);
2353
2354 if ((op->phase == 2) && !pcmk__result_ok(&op->result)) {
2355
2356
2357
2358 crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
2359 "after successful 'off'",
2360 device, pcmk_exec_status_str(op->result.execution_status),
2361 (reason == NULL)? "" : ": ",
2362 (reason == NULL)? "" : reason,
2363 op->target);
2364 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2365 } else {
2366 crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: "
2367 "%s%s%s%s",
2368 op->action, op->target,
2369 ((device == NULL)? "" : " using "),
2370 ((device == NULL)? "" : device),
2371 op->client_name,
2372 op->originator,
2373 pcmk_exec_status_str(op->result.execution_status),
2374 (reason == NULL)? "" : " (",
2375 (reason == NULL)? "" : reason,
2376 (reason == NULL)? "" : ")");
2377 }
2378
2379 if (pcmk__result_ok(&op->result)) {
2380
2381
2382 advance_topology_device_in_level(op, device, msg);
2383 return;
2384 } else {
2385
2386
2387 if (advance_topology_level(op, false) != pcmk_rc_ok) {
2388 op->state = st_failed;
2389 finalize_op(op, msg, false);
2390 return;
2391 }
2392 }
2393
2394 } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) {
2395 op->state = st_done;
2396 finalize_op(op, msg, false);
2397 return;
2398
2399 } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT)
2400 && (op->devices == NULL)) {
2401
2402 op->state = st_failed;
2403 finalize_op(op, msg, false);
2404 return;
2405
2406 } else {
2407
2408 }
2409
2410
2411 crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
2412 op->target, op->originator, op->client_name,
2413 pcmk_exec_status_str(op->result.execution_status));
2414 request_peer_fencing(op, NULL);
2415 }
2416
2417 gboolean
2418 stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
2419 {
2420 GHashTableIter iter;
2421 time_t now = time(NULL);
2422 remote_fencing_op_t *rop = NULL;
2423
2424 if (tolerance <= 0 || !stonith_remote_op_list || target == NULL ||
2425 action == NULL) {
2426 return FALSE;
2427 }
2428
2429 g_hash_table_iter_init(&iter, stonith_remote_op_list);
2430 while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
2431 if (strcmp(rop->target, target) != 0) {
2432 continue;
2433 } else if (rop->state != st_done) {
2434 continue;
2435
2436
2437
2438 } else if (strcmp(rop->action, action) != 0) {
2439 continue;
2440 } else if ((rop->completed + tolerance) < now) {
2441 continue;
2442 }
2443
2444 crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
2445 target, action, tolerance, rop->delegate, rop->originator);
2446 return TRUE;
2447 }
2448 return FALSE;
2449 }