This source file includes following definitions.
- sort_strings
- free_remote_query
- free_stonith_remote_op_list
- count_peer_device
- count_peer_devices
- find_peer_device
- grab_peer_device
- clear_remote_op_timers
- free_remote_op
- init_stonith_remote_op_hash_table
- op_requested_action
- op_phase_off
- op_phase_on
- undo_op_remap
- fencing_result2xml
- fenced_broadcast_op_result
- handle_local_reply_and_notify
- finalize_op_duplicates
- delegate_from_xml
- finalize_op
- remote_op_watchdog_done
- remote_op_timeout_one
- finalize_timed_out_op
- remote_op_timeout
- remote_op_query_timeout
- topology_is_empty
- add_required_device
- remove_required_device
- set_op_device_list
- topology_matches
- find_topology_for_host
- advance_topology_level
- merge_duplicates
- fencing_active_peers
- fenced_handle_manual_confirmation
- create_remote_stonith_op
- initiate_remote_stonith_op
- find_best_peer
- stonith_choose_peer
- get_device_timeout
- add_device_timeout
- get_peer_timeout
- get_op_total_timeout
- report_timeout_period
- advance_topology_device_in_level
- check_watchdog_fencing_and_wait
- request_peer_fencing
- sort_peers
- all_topology_devices_found
- parse_action_specific
- add_device_properties
- add_result
- process_remote_stonith_query
- fenced_process_fencing_reply
- stonith_check_fence_tolerance
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <stdio.h>
14 #include <sys/types.h>
15 #include <sys/wait.h>
16 #include <sys/stat.h>
17 #include <unistd.h>
18 #include <sys/utsname.h>
19
20 #include <stdlib.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <ctype.h>
24 #include <regex.h>
25
26 #include <crm/crm.h>
27 #include <crm/msg_xml.h>
28 #include <crm/common/ipc.h>
29 #include <crm/common/ipc_internal.h>
30 #include <crm/cluster/internal.h>
31
32 #include <crm/stonith-ng.h>
33 #include <crm/fencing/internal.h>
34 #include <crm/common/xml.h>
35 #include <crm/common/xml_internal.h>
36
37 #include <crm/common/util.h>
38 #include <pacemaker-fenced.h>
39
40 #define TIMEOUT_MULTIPLY_FACTOR 1.2
41
42
43
44
45
46
47
48 typedef struct device_properties_s {
49
50 gboolean verified;
51
52
53
54
55 gboolean executed[st_phase_max];
56
57 gboolean disallowed[st_phase_max];
58
59 int custom_action_timeout[st_phase_max];
60
61 int delay_max[st_phase_max];
62
63 int delay_base[st_phase_max];
64
65 uint32_t device_support_flags;
66 } device_properties_t;
67
68 typedef struct {
69
70 char *host;
71
72 gboolean tried;
73
74 int ndevices;
75
76 GHashTable *devices;
77 } peer_device_info_t;
78
79 GHashTable *stonith_remote_op_list = NULL;
80
81 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
82 int call_options);
83
84 static void request_peer_fencing(remote_fencing_op_t *op,
85 peer_device_info_t *peer);
86 static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
87 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
88 static int get_op_total_timeout(const remote_fencing_op_t *op,
89 const peer_device_info_t *chosen_peer);
90
91 static gint
92 sort_strings(gconstpointer a, gconstpointer b)
93 {
94 return strcmp(a, b);
95 }
96
97 static void
98 free_remote_query(gpointer data)
99 {
100 if (data != NULL) {
101 peer_device_info_t *peer = data;
102
103 g_hash_table_destroy(peer->devices);
104 free(peer->host);
105 free(peer);
106 }
107 }
108
109 void
110 free_stonith_remote_op_list(void)
111 {
112 if (stonith_remote_op_list != NULL) {
113 g_hash_table_destroy(stonith_remote_op_list);
114 stonith_remote_op_list = NULL;
115 }
116 }
117
118 struct peer_count_data {
119 const remote_fencing_op_t *op;
120 gboolean verified_only;
121 uint32_t support_action_only;
122 int count;
123 };
124
125
126
127
128
129
130
131
132
133 static void
134 count_peer_device(gpointer key, gpointer value, gpointer user_data)
135 {
136 device_properties_t *props = (device_properties_t*)value;
137 struct peer_count_data *data = user_data;
138
139 if (!props->executed[data->op->phase]
140 && (!data->verified_only || props->verified)
141 && ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) {
142 ++(data->count);
143 }
144 }
145
146
147
148
149
150
151
152
153
154
155
156
157 static int
158 count_peer_devices(const remote_fencing_op_t *op,
159 const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only)
160 {
161 struct peer_count_data data;
162
163 data.op = op;
164 data.verified_only = verified_only;
165 data.support_action_only = support_on_action_only;
166 data.count = 0;
167 if (peer) {
168 g_hash_table_foreach(peer->devices, count_peer_device, &data);
169 }
170 return data.count;
171 }
172
173
174
175
176
177
178
179
180
181
182
183 static device_properties_t *
184 find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer,
185 const char *device, uint32_t support_action_only)
186 {
187 device_properties_t *props = g_hash_table_lookup(peer->devices, device);
188
189 if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) {
190 return NULL;
191 }
192 return (props && !props->executed[op->phase]
193 && !props->disallowed[op->phase])? props : NULL;
194 }
195
196
197
198
199
200
201
202
203
204
205
206
207 static gboolean
208 grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer,
209 const char *device, gboolean verified_devices_only)
210 {
211 device_properties_t *props = find_peer_device(op, peer, device,
212 fenced_support_flag(op->action));
213
214 if ((props == NULL) || (verified_devices_only && !props->verified)) {
215 return FALSE;
216 }
217
218 crm_trace("Removing %s from %s (%d remaining)",
219 device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none));
220 props->executed[op->phase] = TRUE;
221 return TRUE;
222 }
223
224 static void
225 clear_remote_op_timers(remote_fencing_op_t * op)
226 {
227 if (op->query_timer) {
228 g_source_remove(op->query_timer);
229 op->query_timer = 0;
230 }
231 if (op->op_timer_total) {
232 g_source_remove(op->op_timer_total);
233 op->op_timer_total = 0;
234 }
235 if (op->op_timer_one) {
236 g_source_remove(op->op_timer_one);
237 op->op_timer_one = 0;
238 }
239 }
240
241 static void
242 free_remote_op(gpointer data)
243 {
244 remote_fencing_op_t *op = data;
245
246 crm_log_xml_debug(op->request, "Destroying");
247
248 clear_remote_op_timers(op);
249
250 free(op->id);
251 free(op->action);
252 free(op->delegate);
253 free(op->target);
254 free(op->client_id);
255 free(op->client_name);
256 free(op->originator);
257
258 if (op->query_results) {
259 g_list_free_full(op->query_results, free_remote_query);
260 }
261 if (op->request) {
262 free_xml(op->request);
263 op->request = NULL;
264 }
265 if (op->devices_list) {
266 g_list_free_full(op->devices_list, free);
267 op->devices_list = NULL;
268 }
269 g_list_free_full(op->automatic_list, free);
270 g_list_free(op->duplicates);
271
272 pcmk__reset_result(&op->result);
273 free(op);
274 }
275
276 void
277 init_stonith_remote_op_hash_table(GHashTable **table)
278 {
279 if (*table == NULL) {
280 *table = pcmk__strkey_table(NULL, free_remote_op);
281 }
282 }
283
284
285
286
287
288
289
290
291
292 static const char *
293 op_requested_action(const remote_fencing_op_t *op)
294 {
295 return ((op->phase > st_phase_requested)? PCMK_ACTION_REBOOT : op->action);
296 }
297
298
299
300
301
302
303
304 static void
305 op_phase_off(remote_fencing_op_t *op)
306 {
307 crm_info("Remapping multiple-device reboot targeting %s to 'off' "
308 CRM_XS " id=%.8s", op->target, op->id);
309 op->phase = st_phase_off;
310
311
312
313
314 strcpy(op->action, PCMK_ACTION_OFF);
315 }
316
317
318
319
320
321
322
323 static void
324 op_phase_on(remote_fencing_op_t *op)
325 {
326 GList *iter = NULL;
327
328 crm_info("Remapped 'off' targeting %s complete, "
329 "remapping to 'on' for %s " CRM_XS " id=%.8s",
330 op->target, op->client_name, op->id);
331 op->phase = st_phase_on;
332 strcpy(op->action, PCMK_ACTION_ON);
333
334
335
336
337 for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
338 GList *match = g_list_find_custom(op->devices_list, iter->data,
339 sort_strings);
340
341 if (match) {
342 op->devices_list = g_list_remove(op->devices_list, match->data);
343 }
344 }
345 g_list_free_full(op->automatic_list, free);
346 op->automatic_list = NULL;
347
348
349 op->devices = op->devices_list;
350 }
351
352
353
354
355
356
357
358 static void
359 undo_op_remap(remote_fencing_op_t *op)
360 {
361 if (op->phase > 0) {
362 crm_info("Undoing remap of reboot targeting %s for %s "
363 CRM_XS " id=%.8s", op->target, op->client_name, op->id);
364 op->phase = st_phase_requested;
365 strcpy(op->action, PCMK_ACTION_REBOOT);
366 }
367 }
368
369
370
371
372
373
374
375
376
377
378 static xmlNode *
379 fencing_result2xml(const remote_fencing_op_t *op)
380 {
381 xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
382
383 crm_xml_add_int(notify_data, "state", op->state);
384 crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
385 crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
386 crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
387 crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id);
388 crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
389 crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
390 crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
391
392 return notify_data;
393 }
394
395
396
397
398
399
400
401
402 void
403 fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged)
404 {
405 static int count = 0;
406 xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
407 xmlNode *notify_data = fencing_result2xml(op);
408
409 count++;
410 crm_trace("Broadcasting result to peers");
411 crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY);
412 crm_xml_add(bcast, F_SUBTYPE, "broadcast");
413 crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY);
414 crm_xml_add_int(bcast, "count", count);
415
416 if (op_merged) {
417 pcmk__xe_set_bool_attr(bcast, F_STONITH_MERGED, true);
418 }
419
420 stonith__xe_set_result(notify_data, &op->result);
421
422 add_message_xml(bcast, F_STONITH_CALLDATA, notify_data);
423 send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
424 free_xml(notify_data);
425 free_xml(bcast);
426
427 return;
428 }
429
430
431
432
433
434
435
436
437 static void
438 handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
439 {
440 xmlNode *notify_data = NULL;
441 xmlNode *reply = NULL;
442 pcmk__client_t *client = NULL;
443
444 if (op->notify_sent == TRUE) {
445
446 return;
447 }
448
449
450 crm_xml_add_int(data, "state", op->state);
451 crm_xml_add(data, F_STONITH_TARGET, op->target);
452 crm_xml_add(data, F_STONITH_OPERATION, op->action);
453
454 reply = fenced_construct_reply(op->request, data, &op->result);
455 crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
456
457
458 client = pcmk__find_client_by_id(op->client_id);
459 if (client == NULL) {
460 crm_trace("Skipping reply to %s: no longer a client", op->client_id);
461 } else {
462 do_local_reply(reply, client, op->call_options);
463 }
464
465
466 notify_data = fencing_result2xml(op);
467 fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data);
468 free_xml(notify_data);
469 fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
470
471
472 op->notify_sent = TRUE;
473 free_xml(reply);
474 }
475
476
477
478
479
480
481
482
483 static void
484 finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data)
485 {
486 for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
487 remote_fencing_op_t *other = iter->data;
488
489 if (other->state == st_duplicate) {
490 other->state = op->state;
491 crm_debug("Performing duplicate notification for %s@%s: %s "
492 CRM_XS " id=%.8s",
493 other->client_name, other->originator,
494 pcmk_exec_status_str(op->result.execution_status),
495 other->id);
496 pcmk__copy_result(&op->result, &other->result);
497 finalize_op(other, data, true);
498
499 } else {
500
501 crm_err("Skipping duplicate notification for %s@%s "
502 CRM_XS " state=%s id=%.8s",
503 other->client_name, other->originator,
504 stonith_op_state_str(other->state), other->id);
505 }
506 }
507 }
508
509 static char *
510 delegate_from_xml(xmlNode *xml)
511 {
512 xmlNode *match = get_xpath_object("//@" F_STONITH_DELEGATE, xml, LOG_NEVER);
513
514 if (match == NULL) {
515 return crm_element_value_copy(xml, F_ORIG);
516 } else {
517 return crm_element_value_copy(match, F_STONITH_DELEGATE);
518 }
519 }
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537 static void
538 finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
539 {
540 int level = LOG_ERR;
541 const char *subt = NULL;
542 xmlNode *local_data = NULL;
543 gboolean op_merged = FALSE;
544
545 CRM_CHECK((op != NULL), return);
546
547
548 clear_remote_op_timers(op);
549
550 if (op->notify_sent) {
551
552 crm_notice("Operation '%s'%s%s by %s for %s@%s%s: "
553 "Result arrived too late " CRM_XS " id=%.8s",
554 op->action, (op->target? " targeting " : ""),
555 (op->target? op->target : ""),
556 (op->delegate? op->delegate : "unknown node"),
557 op->client_name, op->originator,
558 (op_merged? " (merged)" : ""),
559 op->id);
560 return;
561 }
562
563 set_fencing_completed(op);
564 undo_op_remap(op);
565
566 if (data == NULL) {
567 data = create_xml_node(NULL, "remote-op");
568 local_data = data;
569
570 } else if (op->delegate == NULL) {
571 switch (op->result.execution_status) {
572 case PCMK_EXEC_NO_FENCE_DEVICE:
573 break;
574
575 case PCMK_EXEC_INVALID:
576 if (op->result.exit_status != CRM_EX_EXPIRED) {
577 op->delegate = delegate_from_xml(data);
578 }
579 break;
580
581 default:
582 op->delegate = delegate_from_xml(data);
583 break;
584 }
585 }
586
587 if (dup || (crm_element_value(data, F_STONITH_MERGED) != NULL)) {
588 op_merged = true;
589 }
590
591
592
593
594 subt = crm_element_value(data, F_SUBTYPE);
595 if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
596
597 fenced_broadcast_op_result(op, op_merged);
598 free_xml(local_data);
599 return;
600 }
601
602 if (pcmk__result_ok(&op->result) || dup
603 || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
604 level = LOG_NOTICE;
605 }
606 do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) "
607 CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""),
608 (op->target? op->target : ""),
609 (op->delegate? op->delegate : "unknown node"),
610 op->client_name, op->originator,
611 (op_merged? " (merged)" : ""),
612 crm_exit_str(op->result.exit_status),
613 pcmk_exec_status_str(op->result.execution_status),
614 ((op->result.exit_reason == NULL)? "" : ": "),
615 ((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
616 op->id);
617
618 handle_local_reply_and_notify(op, data);
619
620 if (!dup) {
621 finalize_op_duplicates(op, data);
622 }
623
624
625
626
627 if (op->query_results) {
628 g_list_free_full(op->query_results, free_remote_query);
629 op->query_results = NULL;
630 }
631 if (op->request) {
632 free_xml(op->request);
633 op->request = NULL;
634 }
635
636 free_xml(local_data);
637 }
638
639
640
641
642
643
644
645
646
647 static gboolean
648 remote_op_watchdog_done(gpointer userdata)
649 {
650 remote_fencing_op_t *op = userdata;
651
652 op->op_timer_one = 0;
653
654 crm_notice("Self-fencing (%s) by %s for %s assumed complete "
655 CRM_XS " id=%.8s",
656 op->action, op->target, op->client_name, op->id);
657 op->state = st_done;
658 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
659 finalize_op(op, NULL, false);
660 return G_SOURCE_REMOVE;
661 }
662
663 static gboolean
664 remote_op_timeout_one(gpointer userdata)
665 {
666 remote_fencing_op_t *op = userdata;
667
668 op->op_timer_one = 0;
669
670 crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
671 " id=%.8s", op->action, op->target, op->client_name, op->id);
672 pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
673 "Peer did not return fence result within timeout");
674
675
676 if (op->client_delay > 0) {
677 op->client_delay = 0;
678 crm_trace("Try another device for '%s' action targeting %s "
679 "for client %s without delay " CRM_XS " id=%.8s",
680 op->action, op->target, op->client_name, op->id);
681 }
682
683
684 request_peer_fencing(op, NULL);
685 return G_SOURCE_REMOVE;
686 }
687
688
689
690
691
692
693
694
695 static void
696 finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
697 {
698 crm_debug("Action '%s' targeting %s for client %s timed out "
699 CRM_XS " id=%.8s",
700 op->action, op->target, op->client_name, op->id);
701
702 if (op->phase == st_phase_on) {
703
704
705
706
707 op->state = st_done;
708 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
709 } else {
710 op->state = st_failed;
711 pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
712 }
713 finalize_op(op, NULL, false);
714 }
715
716
717
718
719
720
721
722
723
724 static gboolean
725 remote_op_timeout(gpointer userdata)
726 {
727 remote_fencing_op_t *op = userdata;
728
729 op->op_timer_total = 0;
730
731 if (op->state == st_done) {
732 crm_debug("Action '%s' targeting %s for client %s already completed "
733 CRM_XS " id=%.8s",
734 op->action, op->target, op->client_name, op->id);
735 } else {
736 finalize_timed_out_op(userdata, "Fencing did not complete within a "
737 "total timeout based on the "
738 "configured timeout and retries for "
739 "any devices attempted");
740 }
741 return G_SOURCE_REMOVE;
742 }
743
744 static gboolean
745 remote_op_query_timeout(gpointer data)
746 {
747 remote_fencing_op_t *op = data;
748
749 op->query_timer = 0;
750
751 if (op->state == st_done) {
752 crm_debug("Operation %.8s targeting %s already completed",
753 op->id, op->target);
754 } else if (op->state == st_exec) {
755 crm_debug("Operation %.8s targeting %s already in progress",
756 op->id, op->target);
757 } else if (op->query_results) {
758
759 crm_debug("Query %.8s targeting %s complete (state=%s)",
760 op->id, op->target, stonith_op_state_str(op->state));
761 request_peer_fencing(op, NULL);
762 } else {
763 crm_debug("Query %.8s targeting %s timed out (state=%s)",
764 op->id, op->target, stonith_op_state_str(op->state));
765 finalize_timed_out_op(op, "No capable peers replied to device query "
766 "within timeout");
767 }
768
769 return G_SOURCE_REMOVE;
770 }
771
772 static gboolean
773 topology_is_empty(stonith_topology_t *tp)
774 {
775 int i;
776
777 if (tp == NULL) {
778 return TRUE;
779 }
780
781 for (i = 0; i < ST_LEVEL_MAX; i++) {
782 if (tp->levels[i] != NULL) {
783 return FALSE;
784 }
785 }
786 return TRUE;
787 }
788
789
790
791
792
793
794
795
796 static void
797 add_required_device(remote_fencing_op_t *op, const char *device)
798 {
799 GList *match = g_list_find_custom(op->automatic_list, device,
800 sort_strings);
801
802 if (!match) {
803 op->automatic_list = g_list_prepend(op->automatic_list, strdup(device));
804 }
805 }
806
807
808
809
810
811
812
813
814 static void
815 remove_required_device(remote_fencing_op_t *op, const char *device)
816 {
817 GList *match = g_list_find_custom(op->automatic_list, device,
818 sort_strings);
819
820 if (match) {
821 op->automatic_list = g_list_remove(op->automatic_list, match->data);
822 }
823 }
824
825
826 static void
827 set_op_device_list(remote_fencing_op_t * op, GList *devices)
828 {
829 GList *lpc = NULL;
830
831 if (op->devices_list) {
832 g_list_free_full(op->devices_list, free);
833 op->devices_list = NULL;
834 }
835 for (lpc = devices; lpc != NULL; lpc = lpc->next) {
836 op->devices_list = g_list_append(op->devices_list, strdup(lpc->data));
837 }
838 op->devices = op->devices_list;
839 }
840
841
842
843
844
845
846
847
848
849
850 static gboolean
851 topology_matches(const stonith_topology_t *tp, const char *node)
852 {
853 regex_t r_patt;
854
855 CRM_CHECK(node && tp && tp->target, return FALSE);
856 switch (tp->kind) {
857 case fenced_target_by_attribute:
858
859
860
861
862
863
864 if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
865 crm_notice("Matched %s with %s by attribute", node, tp->target);
866 return TRUE;
867 }
868 break;
869
870 case fenced_target_by_pattern:
871
872
873
874 if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) {
875 crm_info("Bad regex '%s' for fencing level", tp->target);
876 } else {
877 int status = regexec(&r_patt, node, 0, NULL, 0);
878
879 regfree(&r_patt);
880 if (status == 0) {
881 crm_notice("Matched %s with %s by name", node, tp->target);
882 return TRUE;
883 }
884 }
885 break;
886
887 case fenced_target_by_name:
888 crm_trace("Testing %s against %s", node, tp->target);
889 return pcmk__str_eq(tp->target, node, pcmk__str_casei);
890
891 default:
892 break;
893 }
894 crm_trace("No match for %s with %s", node, tp->target);
895 return FALSE;
896 }
897
898 stonith_topology_t *
899 find_topology_for_host(const char *host)
900 {
901 GHashTableIter tIter;
902 stonith_topology_t *tp = g_hash_table_lookup(topology, host);
903
904 if(tp != NULL) {
905 crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
906 return tp;
907 }
908
909 g_hash_table_iter_init(&tIter, topology);
910 while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
911 if (topology_matches(tp, host)) {
912 crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
913 return tp;
914 }
915 }
916
917 crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
918 return NULL;
919 }
920
921
922
923
924
925
926
927
928
929
930
931
932 static int
933 advance_topology_level(remote_fencing_op_t *op, bool empty_ok)
934 {
935 stonith_topology_t *tp = NULL;
936
937 if (op->target) {
938 tp = find_topology_for_host(op->target);
939 }
940 if (topology_is_empty(tp)) {
941 return empty_ok? pcmk_rc_ok : ENODEV;
942 }
943
944 CRM_ASSERT(tp->levels != NULL);
945
946 stonith__set_call_options(op->call_options, op->id, st_opt_topology);
947
948
949 undo_op_remap(op);
950
951 do {
952 op->level++;
953
954 } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
955
956 if (op->level < ST_LEVEL_MAX) {
957 crm_trace("Attempting fencing level %d targeting %s (%d devices) "
958 "for client %s@%s (id=%.8s)",
959 op->level, op->target, g_list_length(tp->levels[op->level]),
960 op->client_name, op->originator, op->id);
961 set_op_device_list(op, tp->levels[op->level]);
962
963
964 if ((op->level > 1) && (op->client_delay > 0)) {
965 op->client_delay = 0;
966 }
967
968 if ((g_list_next(op->devices_list) != NULL)
969 && pcmk__str_eq(op->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
970
971
972
973
974
975 op_phase_off(op);
976 }
977 return pcmk_rc_ok;
978 }
979
980 crm_info("All %sfencing options targeting %s for client %s@%s failed "
981 CRM_XS " id=%.8s",
982 (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"",
983 op->target, op->client_name, op->originator, op->id);
984 return ENODEV;
985 }
986
987
988
989
990
991
992
993 static void
994 merge_duplicates(remote_fencing_op_t *op)
995 {
996 GHashTableIter iter;
997 remote_fencing_op_t *other = NULL;
998
999 time_t now = time(NULL);
1000
1001 g_hash_table_iter_init(&iter, stonith_remote_op_list);
1002 while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
1003 const char *other_action = op_requested_action(other);
1004
1005 if (!strcmp(op->id, other->id)) {
1006 continue;
1007 }
1008 if (other->state > st_exec) {
1009 crm_trace("%.8s not duplicate of %.8s: not in progress",
1010 op->id, other->id);
1011 continue;
1012 }
1013 if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) {
1014 crm_trace("%.8s not duplicate of %.8s: node %s vs. %s",
1015 op->id, other->id, op->target, other->target);
1016 continue;
1017 }
1018 if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) {
1019 crm_trace("%.8s not duplicate of %.8s: action %s vs. %s",
1020 op->id, other->id, op->action, other_action);
1021 continue;
1022 }
1023 if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) {
1024 crm_trace("%.8s not duplicate of %.8s: same client %s",
1025 op->id, other->id, op->client_name);
1026 continue;
1027 }
1028 if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) {
1029 crm_trace("%.8s not duplicate of %.8s: suicide for %s",
1030 op->id, other->id, other->target);
1031 continue;
1032 }
1033 if (!fencing_peer_active(crm_get_peer(0, other->originator))) {
1034 crm_notice("Failing action '%s' targeting %s originating from "
1035 "client %s@%s: Originator is dead " CRM_XS " id=%.8s",
1036 other->action, other->target, other->client_name,
1037 other->originator, other->id);
1038 crm_trace("%.8s not duplicate of %.8s: originator dead",
1039 op->id, other->id);
1040 other->state = st_failed;
1041 continue;
1042 }
1043 if ((other->total_timeout > 0)
1044 && (now > (other->total_timeout + other->created))) {
1045 crm_trace("%.8s not duplicate of %.8s: old (%ld vs. %ld + %d)",
1046 op->id, other->id, now, other->created,
1047 other->total_timeout);
1048 continue;
1049 }
1050
1051
1052
1053
1054 other->duplicates = g_list_append(other->duplicates, op);
1055 if (other->total_timeout == 0) {
1056 other->total_timeout = op->total_timeout =
1057 TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
1058 crm_trace("Best guess as to timeout used for %.8s: %d",
1059 other->id, other->total_timeout);
1060 }
1061 crm_notice("Merging fencing action '%s' targeting %s originating from "
1062 "client %s with identical request from %s@%s "
1063 CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds",
1064 op->action, op->target, op->client_name,
1065 other->client_name, other->originator,
1066 op->id, other->id, other->total_timeout);
1067 report_timeout_period(op, other->total_timeout);
1068 op->state = st_duplicate;
1069 }
1070 }
1071
1072 static uint32_t fencing_active_peers(void)
1073 {
1074 uint32_t count = 0;
1075 crm_node_t *entry;
1076 GHashTableIter gIter;
1077
1078 g_hash_table_iter_init(&gIter, crm_peer_cache);
1079 while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
1080 if(fencing_peer_active(entry)) {
1081 count++;
1082 }
1083 }
1084 return count;
1085 }
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096 int
1097 fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg)
1098 {
1099 remote_fencing_op_t *op = NULL;
1100 xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
1101
1102 CRM_CHECK(dev != NULL, return EPROTO);
1103
1104 crm_notice("Received manual confirmation that %s has been fenced",
1105 pcmk__s(crm_element_value(dev, F_STONITH_TARGET),
1106 "unknown target"));
1107 op = initiate_remote_stonith_op(client, msg, TRUE);
1108 if (op == NULL) {
1109 return EPROTO;
1110 }
1111 op->state = st_done;
1112 set_fencing_completed(op);
1113 op->delegate = strdup("a human");
1114
1115
1116 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1117 finalize_op(op, msg, false);
1118
1119
1120
1121
1122 return EINPROGRESS;
1123 }
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136 void *
1137 create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer)
1138 {
1139 remote_fencing_op_t *op = NULL;
1140 xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_NEVER);
1141 int call_options = 0;
1142 const char *operation = NULL;
1143
1144 init_stonith_remote_op_hash_table(&stonith_remote_op_list);
1145
1146
1147
1148 if (peer && dev) {
1149 const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
1150
1151 CRM_CHECK(op_id != NULL, return NULL);
1152
1153 op = g_hash_table_lookup(stonith_remote_op_list, op_id);
1154 if (op) {
1155 crm_debug("Reusing existing remote fencing op %.8s for %s",
1156 op_id, ((client == NULL)? "unknown client" : client));
1157 return op;
1158 }
1159 }
1160
1161 op = calloc(1, sizeof(remote_fencing_op_t));
1162 CRM_ASSERT(op != NULL);
1163
1164 crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
1165
1166 crm_element_value_int(request, F_STONITH_DELAY, &(op->client_delay));
1167
1168 if (peer && dev) {
1169 op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
1170 } else {
1171 op->id = crm_generate_uuid();
1172 }
1173
1174 g_hash_table_replace(stonith_remote_op_list, op->id, op);
1175
1176 op->state = st_query;
1177 op->replies_expected = fencing_active_peers();
1178 op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
1179 op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN);
1180 op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE);
1181 op->created = time(NULL);
1182
1183 if (op->originator == NULL) {
1184
1185 op->originator = strdup(stonith_our_uname);
1186 }
1187
1188 CRM_LOG_ASSERT(client != NULL);
1189 if (client) {
1190 op->client_id = strdup(client);
1191 }
1192
1193
1194
1195 operation = crm_element_value(request, F_STONITH_OPERATION);
1196
1197 if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
1198 op->client_name = crm_strdup_printf("%s.%lu", crm_system_name,
1199 (unsigned long) getpid());
1200 } else {
1201 op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);
1202 }
1203
1204 op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
1205 op->request = copy_xml(request);
1206 crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
1207 op->call_options = call_options;
1208
1209 crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid));
1210
1211 crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, "
1212 "base timeout %d, %u %s expected)",
1213 (peer && dev)? "Recorded" : "Generated", op->id, op->action,
1214 op->target, op->client_name, op->base_timeout,
1215 op->replies_expected,
1216 pcmk__plural_alt(op->replies_expected, "reply", "replies"));
1217
1218 if (op->call_options & st_opt_cs_nodeid) {
1219 int nodeid;
1220 crm_node_t *node;
1221
1222 pcmk__scan_min_int(op->target, &nodeid, 0);
1223 node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
1224
1225
1226 stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid);
1227
1228 if (node && node->uname) {
1229 free(op->target);
1230 op->target = strdup(node->uname);
1231
1232 } else {
1233 crm_warn("Could not expand nodeid '%s' into a host name", op->target);
1234 }
1235 }
1236
1237
1238 merge_duplicates(op);
1239
1240 if (op->state != st_duplicate) {
1241
1242 fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
1243 }
1244
1245
1246 stonith_fence_history_trim();
1247
1248 return op;
1249 }
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261 remote_fencing_op_t *
1262 initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request,
1263 gboolean manual_ack)
1264 {
1265 int query_timeout = 0;
1266 xmlNode *query = NULL;
1267 const char *client_id = NULL;
1268 remote_fencing_op_t *op = NULL;
1269 const char *relay_op_id = NULL;
1270 const char *operation = NULL;
1271
1272 if (client) {
1273 client_id = client->id;
1274 } else {
1275 client_id = crm_element_value(request, F_STONITH_CLIENTID);
1276 }
1277
1278 CRM_LOG_ASSERT(client_id != NULL);
1279 op = create_remote_stonith_op(client_id, request, FALSE);
1280 op->owner = TRUE;
1281 if (manual_ack) {
1282 return op;
1283 }
1284
1285 CRM_CHECK(op->action, return NULL);
1286
1287 if (advance_topology_level(op, true) != pcmk_rc_ok) {
1288 op->state = st_failed;
1289 }
1290
1291 switch (op->state) {
1292 case st_failed:
1293
1294 pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
1295 "All topology levels failed");
1296 crm_warn("Could not request peer fencing (%s) targeting %s "
1297 CRM_XS " id=%.8s", op->action, op->target, op->id);
1298 finalize_op(op, NULL, false);
1299 return op;
1300
1301 case st_duplicate:
1302 crm_info("Requesting peer fencing (%s) targeting %s (duplicate) "
1303 CRM_XS " id=%.8s", op->action, op->target, op->id);
1304 return op;
1305
1306 default:
1307 crm_notice("Requesting peer fencing (%s) targeting %s "
1308 CRM_XS " id=%.8s state=%s base_timeout=%d",
1309 op->action, op->target, op->id,
1310 stonith_op_state_str(op->state), op->base_timeout);
1311 }
1312
1313 query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
1314 NULL, op->call_options);
1315
1316 crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
1317 crm_xml_add(query, F_STONITH_TARGET, op->target);
1318 crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op));
1319 crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
1320 crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
1321 crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
1322 crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
1323
1324
1325 operation = crm_element_value(request, F_STONITH_OPERATION);
1326 if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
1327 relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
1328 if (relay_op_id) {
1329 crm_xml_add(query, F_STONITH_REMOTE_OP_ID_RELAY, relay_op_id);
1330 }
1331 }
1332
1333 send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
1334 free_xml(query);
1335
1336 query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
1337 op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
1338
1339 return op;
1340 }
1341
1342 enum find_best_peer_options {
1343
1344 FIND_PEER_SKIP_TARGET = 0x0001,
1345
1346 FIND_PEER_TARGET_ONLY = 0x0002,
1347
1348 FIND_PEER_VERIFIED_ONLY = 0x0004,
1349 };
1350
1351 static peer_device_info_t *
1352 find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
1353 {
1354 GList *iter = NULL;
1355 gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
1356
1357 if (!device && pcmk_is_set(op->call_options, st_opt_topology)) {
1358 return NULL;
1359 }
1360
1361 for (iter = op->query_results; iter != NULL; iter = iter->next) {
1362 peer_device_info_t *peer = iter->data;
1363
1364 crm_trace("Testing result from %s targeting %s with %d device%s: %d %x",
1365 peer->host, op->target, peer->ndevices,
1366 pcmk__plural_s(peer->ndevices), peer->tried, options);
1367 if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
1368 continue;
1369 }
1370 if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
1371 continue;
1372 }
1373
1374 if (pcmk_is_set(op->call_options, st_opt_topology)) {
1375
1376 if (grab_peer_device(op, peer, device, verified_devices_only)) {
1377 return peer;
1378 }
1379
1380 } else if (!peer->tried
1381 && count_peer_devices(op, peer, verified_devices_only,
1382 fenced_support_flag(op->action))) {
1383
1384 crm_trace("Simple fencing");
1385 return peer;
1386 }
1387 }
1388
1389 return NULL;
1390 }
1391
1392 static peer_device_info_t *
1393 stonith_choose_peer(remote_fencing_op_t * op)
1394 {
1395 const char *device = NULL;
1396 peer_device_info_t *peer = NULL;
1397 uint32_t active = fencing_active_peers();
1398
1399 do {
1400 if (op->devices) {
1401 device = op->devices->data;
1402 crm_trace("Checking for someone to fence (%s) %s using %s",
1403 op->action, op->target, device);
1404 } else {
1405 crm_trace("Checking for someone to fence (%s) %s",
1406 op->action, op->target);
1407 }
1408
1409
1410 peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
1411 if (peer) {
1412 crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
1413 return peer;
1414 }
1415
1416 if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
1417 crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
1418 return NULL;
1419 }
1420
1421
1422 peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
1423 if (peer) {
1424 crm_trace("Found best unverified peer %s", peer->host);
1425 return peer;
1426 }
1427
1428
1429
1430
1431 if (op->phase != st_phase_on) {
1432 peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
1433 if (peer) {
1434 crm_trace("%s will fence itself", peer->host);
1435 return peer;
1436 }
1437 }
1438
1439
1440
1441
1442 } while ((op->phase != st_phase_on)
1443 && pcmk_is_set(op->call_options, st_opt_topology)
1444 && (advance_topology_level(op, false) == pcmk_rc_ok));
1445
1446 if ((stonith_watchdog_timeout_ms > 0)
1447 && pcmk__is_fencing_action(op->action)
1448 && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
1449 && node_does_watchdog_fencing(op->target)) {
1450 crm_info("Couldn't contact watchdog-fencing target-node (%s)",
1451 op->target);
1452
1453 } else {
1454 crm_notice("Couldn't find anyone to fence (%s) %s using %s",
1455 op->action, op->target, (device? device : "any device"));
1456 }
1457 return NULL;
1458 }
1459
1460 static int
1461 get_device_timeout(const remote_fencing_op_t *op,
1462 const peer_device_info_t *peer, const char *device,
1463 bool with_delay)
1464 {
1465 device_properties_t *props;
1466 int delay = 0;
1467
1468 if (!peer || !device) {
1469 return op->base_timeout;
1470 }
1471
1472 props = g_hash_table_lookup(peer->devices, device);
1473 if (!props) {
1474 return op->base_timeout;
1475 }
1476
1477
1478 if (with_delay && (op->client_delay >= 0)) {
1479
1480 delay = (props->delay_max[op->phase] > 0 ?
1481 props->delay_max[op->phase] : props->delay_base[op->phase]);
1482 }
1483
1484 return (props->custom_action_timeout[op->phase]?
1485 props->custom_action_timeout[op->phase] : op->base_timeout)
1486 + delay;
1487 }
1488
1489 struct timeout_data {
1490 const remote_fencing_op_t *op;
1491 const peer_device_info_t *peer;
1492 int total_timeout;
1493 };
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503 static void
1504 add_device_timeout(gpointer key, gpointer value, gpointer user_data)
1505 {
1506 const char *device_id = key;
1507 device_properties_t *props = value;
1508 struct timeout_data *timeout = user_data;
1509
1510 if (!props->executed[timeout->op->phase]
1511 && !props->disallowed[timeout->op->phase]) {
1512 timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer,
1513 device_id, true);
1514 }
1515 }
1516
1517 static int
1518 get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer)
1519 {
1520 struct timeout_data timeout;
1521
1522 timeout.op = op;
1523 timeout.peer = peer;
1524 timeout.total_timeout = 0;
1525
1526 g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
1527
1528 return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
1529 }
1530
1531 static int
1532 get_op_total_timeout(const remote_fencing_op_t *op,
1533 const peer_device_info_t *chosen_peer)
1534 {
1535 int total_timeout = 0;
1536 stonith_topology_t *tp = find_topology_for_host(op->target);
1537
1538 if (pcmk_is_set(op->call_options, st_opt_topology) && tp) {
1539 int i;
1540 GList *device_list = NULL;
1541 GList *iter = NULL;
1542 GList *auto_list = NULL;
1543
1544 if (pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)
1545 && (op->automatic_list != NULL)) {
1546 auto_list = g_list_copy(op->automatic_list);
1547 }
1548
1549
1550
1551
1552
1553
1554
1555
1556 for (i = 0; i < ST_LEVEL_MAX; i++) {
1557 if (!tp->levels[i]) {
1558 continue;
1559 }
1560 for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
1561
1562
1563
1564 if ((stonith_watchdog_timeout_ms > 0)
1565 && pcmk__is_fencing_action(op->action)
1566 && pcmk__str_eq(device_list->data, STONITH_WATCHDOG_ID,
1567 pcmk__str_none)
1568 && node_does_watchdog_fencing(op->target)) {
1569 total_timeout += stonith_watchdog_timeout_ms / 1000;
1570 continue;
1571 }
1572
1573 for (iter = op->query_results; iter != NULL; iter = iter->next) {
1574 const peer_device_info_t *peer = iter->data;
1575
1576 if (auto_list) {
1577 GList *match = g_list_find_custom(auto_list, device_list->data,
1578 sort_strings);
1579 if (match) {
1580 auto_list = g_list_remove(auto_list, match->data);
1581 }
1582 }
1583
1584 if (find_peer_device(op, peer, device_list->data,
1585 fenced_support_flag(op->action))) {
1586 total_timeout += get_device_timeout(op, peer,
1587 device_list->data,
1588 true);
1589 break;
1590 }
1591 }
1592 }
1593 }
1594
1595
1596 if (auto_list) {
1597 for (iter = auto_list; iter != NULL; iter = iter->next) {
1598 GList *iter2 = NULL;
1599
1600 for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) {
1601 peer_device_info_t *peer = iter2->data;
1602 if (find_peer_device(op, peer, iter->data, st_device_supports_on)) {
1603 total_timeout += get_device_timeout(op, peer,
1604 iter->data, true);
1605 break;
1606 }
1607 }
1608 }
1609 }
1610
1611 g_list_free(auto_list);
1612
1613 } else if (chosen_peer) {
1614 total_timeout = get_peer_timeout(op, chosen_peer);
1615 } else {
1616 total_timeout = op->base_timeout;
1617 }
1618
1619
1620
1621
1622 return ((total_timeout ? total_timeout : op->base_timeout)
1623 + ((op->client_delay > 0)? op->client_delay : 0));
1624 }
1625
1626 static void
1627 report_timeout_period(remote_fencing_op_t * op, int op_timeout)
1628 {
1629 GList *iter = NULL;
1630 xmlNode *update = NULL;
1631 const char *client_node = NULL;
1632 const char *client_id = NULL;
1633 const char *call_id = NULL;
1634
1635 if (op->call_options & st_opt_sync_call) {
1636
1637
1638
1639
1640 return;
1641 } else if (!op->request) {
1642 return;
1643 }
1644
1645 crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id);
1646 client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE);
1647 call_id = crm_element_value(op->request, F_STONITH_CALLID);
1648 client_id = crm_element_value(op->request, F_STONITH_CLIENTID);
1649 if (!client_node || !call_id || !client_id) {
1650 return;
1651 }
1652
1653 if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) {
1654
1655 do_stonith_async_timeout_update(client_id, call_id, op_timeout);
1656 return;
1657 }
1658
1659
1660 update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
1661 crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id);
1662 crm_xml_add(update, F_STONITH_CLIENTID, client_id);
1663 crm_xml_add(update, F_STONITH_CALLID, call_id);
1664 crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout);
1665
1666 send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE);
1667
1668 free_xml(update);
1669
1670 for (iter = op->duplicates; iter != NULL; iter = iter->next) {
1671 remote_fencing_op_t *dup = iter->data;
1672
1673 crm_trace("Reporting timeout for duplicate %.8s to client %s",
1674 dup->id, dup->client_name);
1675 report_timeout_period(iter->data, op_timeout);
1676 }
1677 }
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687 static void
1688 advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
1689 xmlNode *msg)
1690 {
1691
1692 if (op->devices) {
1693 op->devices = op->devices->next;
1694 }
1695
1696
1697 if ((op->phase == st_phase_requested)
1698 && pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)) {
1699
1700 remove_required_device(op, device);
1701
1702
1703
1704
1705 if (op->devices == NULL) {
1706 op->devices = op->automatic_list;
1707 }
1708 }
1709
1710 if ((op->devices == NULL) && (op->phase == st_phase_off)) {
1711
1712
1713
1714
1715 op_phase_on(op);
1716 }
1717
1718
1719 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1720
1721 if (op->devices) {
1722
1723 crm_trace("Next targeting %s on behalf of %s@%s",
1724 op->target, op->client_name, op->originator);
1725
1726
1727 if (op->client_delay > 0) {
1728 op->client_delay = 0;
1729 }
1730
1731 request_peer_fencing(op, NULL);
1732 } else {
1733
1734 crm_trace("Marking complex fencing op targeting %s as complete",
1735 op->target);
1736 op->state = st_done;
1737 finalize_op(op, msg, false);
1738 }
1739 }
1740
1741 static gboolean
1742 check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
1743 {
1744 if (node_does_watchdog_fencing(op->target)) {
1745
1746 crm_notice("Waiting %lds for %s to self-fence (%s) for "
1747 "client %s " CRM_XS " id=%.8s",
1748 (stonith_watchdog_timeout_ms / 1000),
1749 op->target, op->action, op->client_name, op->id);
1750
1751 if (op->op_timer_one) {
1752 g_source_remove(op->op_timer_one);
1753 }
1754 op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms,
1755 remote_op_watchdog_done, op);
1756 return TRUE;
1757 } else {
1758 crm_debug("Skipping fallback to watchdog-fencing as %s is "
1759 "not in host-list", op->target);
1760 }
1761 return FALSE;
1762 }
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772 static void
1773 request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
1774 {
1775 const char *device = NULL;
1776 int timeout;
1777
1778 CRM_CHECK(op != NULL, return);
1779
1780 crm_trace("Action %.8s targeting %s for %s is %s",
1781 op->id, op->target, op->client_name,
1782 stonith_op_state_str(op->state));
1783
1784 if ((op->phase == st_phase_on) && (op->devices != NULL)) {
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796 device = op->devices->data;
1797 if (pcmk__str_eq(fenced_device_reboot_action(device), PCMK_ACTION_OFF,
1798 pcmk__str_none)) {
1799 crm_info("Not turning %s back on using %s because the device is "
1800 "configured to stay off (pcmk_reboot_action='off')",
1801 op->target, device);
1802 advance_topology_device_in_level(op, device, NULL);
1803 return;
1804 }
1805 if (!fenced_device_supports_on(device)) {
1806 crm_info("Not turning %s back on using %s because the agent "
1807 "doesn't support 'on'", op->target, device);
1808 advance_topology_device_in_level(op, device, NULL);
1809 return;
1810 }
1811 }
1812
1813 timeout = op->base_timeout;
1814 if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) {
1815 peer = stonith_choose_peer(op);
1816 }
1817
1818 if (!op->op_timer_total) {
1819 op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer);
1820 op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
1821 report_timeout_period(op, op->total_timeout);
1822 crm_info("Total timeout set to %d for peer's fencing targeting %s for %s"
1823 CRM_XS "id=%.8s",
1824 op->total_timeout, op->target, op->client_name, op->id);
1825 }
1826
1827 if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) {
1828
1829
1830
1831
1832
1833
1834
1835
1836 peer = stonith_choose_peer(op);
1837
1838 device = op->devices->data;
1839
1840
1841
1842
1843 timeout = get_device_timeout(op, peer, device, false);
1844 }
1845
1846 if (peer) {
1847 int timeout_one = 0;
1848 xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
1849
1850 if (op->client_delay > 0) {
1851
1852
1853
1854 timeout_one = TIMEOUT_MULTIPLY_FACTOR * op->client_delay;
1855 }
1856
1857 crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id);
1858 crm_xml_add(remote_op, F_STONITH_TARGET, op->target);
1859 crm_xml_add(remote_op, F_STONITH_ACTION, op->action);
1860 crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator);
1861 crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id);
1862 crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
1863 crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
1864 crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
1865 crm_xml_add_int(remote_op, F_STONITH_DELAY, op->client_delay);
1866
1867 if (device) {
1868 timeout_one += TIMEOUT_MULTIPLY_FACTOR *
1869 get_device_timeout(op, peer, device, true);
1870 crm_notice("Requesting that %s perform '%s' action targeting %s "
1871 "using %s " CRM_XS " for client %s (%ds)",
1872 peer->host, op->action, op->target, device,
1873 op->client_name, timeout_one);
1874 crm_xml_add(remote_op, F_STONITH_DEVICE, device);
1875
1876 } else {
1877 timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
1878 crm_notice("Requesting that %s perform '%s' action targeting %s "
1879 CRM_XS " for client %s (%ds, %lds)",
1880 peer->host, op->action, op->target, op->client_name,
1881 timeout_one, stonith_watchdog_timeout_ms);
1882 }
1883
1884 op->state = st_exec;
1885 if (op->op_timer_one) {
1886 g_source_remove(op->op_timer_one);
1887 op->op_timer_one = 0;
1888 }
1889
1890 if (!((stonith_watchdog_timeout_ms > 0)
1891 && (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
1892 || (pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
1893 && pcmk__is_fencing_action(op->action)))
1894 && check_watchdog_fencing_and_wait(op))) {
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916 op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
1917 }
1918
1919 send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
1920 peer->tried = TRUE;
1921 free_xml(remote_op);
1922 return;
1923
1924 } else if (op->phase == st_phase_on) {
1925
1926
1927
1928 crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
1929 "after successful 'off'", device, op->target);
1930 advance_topology_device_in_level(op, device, NULL);
1931 return;
1932
1933 } else if (op->owner == FALSE) {
1934 crm_err("Fencing (%s) targeting %s for client %s is not ours to control",
1935 op->action, op->target, op->client_name);
1936
1937 } else if (op->query_timer == 0) {
1938
1939 crm_info("No remaining peers capable of fencing (%s) %s for client %s "
1940 CRM_XS " state=%s", op->action, op->target, op->client_name,
1941 stonith_op_state_str(op->state));
1942 CRM_CHECK(op->state < st_done, return);
1943 finalize_timed_out_op(op, "All nodes failed, or are unable, to "
1944 "fence target");
1945
1946 } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
1947
1948
1949
1950
1951 if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device,
1952 STONITH_WATCHDOG_ID, pcmk__str_null_matches)) {
1953 if (check_watchdog_fencing_and_wait(op)) {
1954 return;
1955 }
1956 }
1957
1958 if (op->state == st_query) {
1959 crm_info("No peers (out of %d) have devices capable of fencing "
1960 "(%s) %s for client %s " CRM_XS " state=%s",
1961 op->replies, op->action, op->target, op->client_name,
1962 stonith_op_state_str(op->state));
1963
1964 pcmk__reset_result(&op->result);
1965 pcmk__set_result(&op->result, CRM_EX_ERROR,
1966 PCMK_EXEC_NO_FENCE_DEVICE, NULL);
1967 } else {
1968 if (pcmk_is_set(op->call_options, st_opt_topology)) {
1969 pcmk__reset_result(&op->result);
1970 pcmk__set_result(&op->result, CRM_EX_ERROR,
1971 PCMK_EXEC_NO_FENCE_DEVICE, NULL);
1972 }
1973
1974
1975
1976
1977
1978
1979
1980 crm_info("No peers (out of %d) are capable of fencing (%s) %s "
1981 "for client %s " CRM_XS " state=%s",
1982 op->replies, op->action, op->target, op->client_name,
1983 stonith_op_state_str(op->state));
1984 }
1985
1986 op->state = st_failed;
1987 finalize_op(op, NULL, false);
1988
1989 } else {
1990 crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
1991 "for client %s " CRM_XS " id=%.8s",
1992 op->action, op->target, (device? " using " : ""),
1993 (device? device : ""), op->client_name, op->id);
1994 }
1995 }
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008 static gint
2009 sort_peers(gconstpointer a, gconstpointer b)
2010 {
2011 const peer_device_info_t *peer_a = a;
2012 const peer_device_info_t *peer_b = b;
2013
2014 return (peer_b->ndevices - peer_a->ndevices);
2015 }
2016
2017
2018
2019
2020
2021
2022
2023 static gboolean
2024 all_topology_devices_found(const remote_fencing_op_t *op)
2025 {
2026 GList *device = NULL;
2027 GList *iter = NULL;
2028 device_properties_t *match = NULL;
2029 stonith_topology_t *tp = NULL;
2030 gboolean skip_target = FALSE;
2031 int i;
2032
2033 tp = find_topology_for_host(op->target);
2034 if (!tp) {
2035 return FALSE;
2036 }
2037 if (pcmk__is_fencing_action(op->action)) {
2038
2039
2040 skip_target = TRUE;
2041 }
2042
2043 for (i = 0; i < ST_LEVEL_MAX; i++) {
2044 for (device = tp->levels[i]; device; device = device->next) {
2045 match = NULL;
2046 for (iter = op->query_results; iter && !match; iter = iter->next) {
2047 peer_device_info_t *peer = iter->data;
2048
2049 if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
2050 continue;
2051 }
2052 match = find_peer_device(op, peer, device->data, st_device_supports_none);
2053 }
2054 if (!match) {
2055 return FALSE;
2056 }
2057 }
2058 }
2059
2060 return TRUE;
2061 }
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075 static void
2076 parse_action_specific(const xmlNode *xml, const char *peer, const char *device,
2077 const char *action, remote_fencing_op_t *op,
2078 enum st_remap_phase phase, device_properties_t *props)
2079 {
2080 props->custom_action_timeout[phase] = 0;
2081 crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT,
2082 &props->custom_action_timeout[phase]);
2083 if (props->custom_action_timeout[phase]) {
2084 crm_trace("Peer %s with device %s returned %s action timeout %d",
2085 peer, device, action, props->custom_action_timeout[phase]);
2086 }
2087
2088 props->delay_max[phase] = 0;
2089 crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]);
2090 if (props->delay_max[phase]) {
2091 crm_trace("Peer %s with device %s returned maximum of random delay %d for %s",
2092 peer, device, props->delay_max[phase], action);
2093 }
2094
2095 props->delay_base[phase] = 0;
2096 crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]);
2097 if (props->delay_base[phase]) {
2098 crm_trace("Peer %s with device %s returned base delay %d for %s",
2099 peer, device, props->delay_base[phase], action);
2100 }
2101
2102
2103 if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
2104 int required = 0;
2105
2106 crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required);
2107 if (required) {
2108 crm_trace("Peer %s requires device %s to execute for action %s",
2109 peer, device, action);
2110 add_required_device(op, device);
2111 }
2112 }
2113
2114
2115
2116
2117 if (pcmk__xe_attr_is_true(xml, F_STONITH_ACTION_DISALLOWED)) {
2118 props->disallowed[phase] = TRUE;
2119 crm_trace("Peer %s is disallowed from executing %s for device %s",
2120 peer, action, device);
2121 }
2122 }
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133 static void
2134 add_device_properties(const xmlNode *xml, remote_fencing_op_t *op,
2135 peer_device_info_t *peer, const char *device)
2136 {
2137 xmlNode *child;
2138 int verified = 0;
2139 device_properties_t *props = calloc(1, sizeof(device_properties_t));
2140 int flags = st_device_supports_on;
2141
2142
2143 CRM_ASSERT(props != NULL);
2144 g_hash_table_insert(peer->devices, strdup(device), props);
2145
2146
2147 crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified);
2148 if (verified) {
2149 crm_trace("Peer %s has confirmed a verified device %s",
2150 peer->host, device);
2151 props->verified = TRUE;
2152 }
2153
2154 crm_element_value_int(xml, F_STONITH_DEVICE_SUPPORT_FLAGS, &flags);
2155 props->device_support_flags = flags;
2156
2157
2158 parse_action_specific(xml, peer->host, device, op_requested_action(op),
2159 op, st_phase_requested, props);
2160 for (child = pcmk__xml_first_child(xml); child != NULL;
2161 child = pcmk__xml_next(child)) {
2162
2163
2164
2165
2166 if (pcmk__str_eq(ID(child), PCMK_ACTION_OFF, pcmk__str_none)) {
2167 parse_action_specific(child, peer->host, device, PCMK_ACTION_OFF,
2168 op, st_phase_off, props);
2169 } else if (pcmk__str_eq(ID(child), PCMK_ACTION_ON, pcmk__str_none)) {
2170 parse_action_specific(child, peer->host, device, PCMK_ACTION_ON,
2171 op, st_phase_on, props);
2172 }
2173 }
2174 }
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187 static peer_device_info_t *
2188 add_result(remote_fencing_op_t *op, const char *host, int ndevices,
2189 const xmlNode *xml)
2190 {
2191 peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t));
2192 xmlNode *child;
2193
2194
2195
2196 CRM_CHECK(peer != NULL, return NULL);
2197 peer->host = strdup(host);
2198 peer->devices = pcmk__strkey_table(free, free);
2199
2200
2201 for (child = pcmk__xml_first_child(xml); child != NULL;
2202 child = pcmk__xml_next(child)) {
2203 const char *device = ID(child);
2204
2205 if (device) {
2206 add_device_properties(child, op, peer, device);
2207 }
2208 }
2209
2210 peer->ndevices = g_hash_table_size(peer->devices);
2211 CRM_CHECK(ndevices == peer->ndevices,
2212 crm_err("Query claimed to have %d device%s but %d found",
2213 ndevices, pcmk__plural_s(ndevices), peer->ndevices));
2214
2215 op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers);
2216 return peer;
2217 }
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233 int
2234 process_remote_stonith_query(xmlNode *msg)
2235 {
2236 int ndevices = 0;
2237 gboolean host_is_target = FALSE;
2238 gboolean have_all_replies = FALSE;
2239 const char *id = NULL;
2240 const char *host = NULL;
2241 remote_fencing_op_t *op = NULL;
2242 peer_device_info_t *peer = NULL;
2243 uint32_t replies_expected;
2244 xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
2245
2246 CRM_CHECK(dev != NULL, return -EPROTO);
2247
2248 id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
2249 CRM_CHECK(id != NULL, return -EPROTO);
2250
2251 dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR);
2252 CRM_CHECK(dev != NULL, return -EPROTO);
2253 crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices);
2254
2255 op = g_hash_table_lookup(stonith_remote_op_list, id);
2256 if (op == NULL) {
2257 crm_debug("Received query reply for unknown or expired operation %s",
2258 id);
2259 return -EOPNOTSUPP;
2260 }
2261
2262 replies_expected = fencing_active_peers();
2263 if (op->replies_expected < replies_expected) {
2264 replies_expected = op->replies_expected;
2265 }
2266 if ((++op->replies >= replies_expected) && (op->state == st_query)) {
2267 have_all_replies = TRUE;
2268 }
2269 host = crm_element_value(msg, F_ORIG);
2270 host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei);
2271
2272 crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s",
2273 op->replies, replies_expected, host,
2274 op->target, op->action, ndevices, pcmk__plural_s(ndevices), id);
2275 if (ndevices > 0) {
2276 peer = add_result(op, host, ndevices, dev);
2277 }
2278
2279 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2280
2281 if (pcmk_is_set(op->call_options, st_opt_topology)) {
2282
2283
2284
2285 if (op->state == st_query && all_topology_devices_found(op)) {
2286
2287 crm_trace("All topology devices found");
2288 request_peer_fencing(op, peer);
2289
2290 } else if (have_all_replies) {
2291 crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
2292 replies_expected, op->replies);
2293 request_peer_fencing(op, NULL);
2294 }
2295
2296 } else if (op->state == st_query) {
2297 int nverified = count_peer_devices(op, peer, TRUE,
2298 fenced_support_flag(op->action));
2299
2300
2301
2302 if ((peer != NULL) && !host_is_target && nverified) {
2303
2304 crm_trace("Found %d verified device%s",
2305 nverified, pcmk__plural_s(nverified));
2306 request_peer_fencing(op, peer);
2307
2308 } else if (have_all_replies) {
2309 crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
2310 replies_expected, op->replies);
2311 request_peer_fencing(op, NULL);
2312
2313 } else {
2314 crm_trace("Waiting for more peer results before launching fencing operation");
2315 }
2316
2317 } else if ((peer != NULL) && (op->state == st_done)) {
2318 crm_info("Discarding query result from %s (%d device%s): "
2319 "Operation is %s", peer->host,
2320 peer->ndevices, pcmk__plural_s(peer->ndevices),
2321 stonith_op_state_str(op->state));
2322 }
2323
2324 return pcmk_ok;
2325 }
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336 void
2337 fenced_process_fencing_reply(xmlNode *msg)
2338 {
2339 const char *id = NULL;
2340 const char *device = NULL;
2341 remote_fencing_op_t *op = NULL;
2342 xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
2343 pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
2344
2345 CRM_CHECK(dev != NULL, return);
2346
2347 id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
2348 CRM_CHECK(id != NULL, return);
2349
2350 dev = stonith__find_xe_with_result(msg);
2351 CRM_CHECK(dev != NULL, return);
2352
2353 stonith__xe_get_result(dev, &result);
2354
2355 device = crm_element_value(dev, F_STONITH_DEVICE);
2356
2357 if (stonith_remote_op_list) {
2358 op = g_hash_table_lookup(stonith_remote_op_list, id);
2359 }
2360
2361 if ((op == NULL) && pcmk__result_ok(&result)) {
2362
2363 const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
2364
2365 op = create_remote_stonith_op(client_id, dev, TRUE);
2366 }
2367
2368 if (op == NULL) {
2369
2370
2371 crm_info("Received peer result of unknown or expired operation %s", id);
2372 pcmk__reset_result(&result);
2373 return;
2374 }
2375
2376 pcmk__reset_result(&op->result);
2377 op->result = result;
2378
2379 if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
2380 crm_err("Received outdated reply for device %s (instead of %s) to "
2381 "fence (%s) %s. Operation already timed out at peer level.",
2382 device, (const char *) op->devices->data, op->action, op->target);
2383 return;
2384 }
2385
2386 if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
2387 if (pcmk__result_ok(&op->result)) {
2388 op->state = st_done;
2389 } else {
2390 op->state = st_failed;
2391 }
2392 finalize_op(op, msg, false);
2393 return;
2394
2395 } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
2396
2397
2398 crm_err("Received non-broadcast fencing result for operation %.8s "
2399 "we do not own (device %s targeting %s)",
2400 op->id, device, op->target);
2401 return;
2402 }
2403
2404 if (pcmk_is_set(op->call_options, st_opt_topology)) {
2405 const char *device = NULL;
2406 const char *reason = op->result.exit_reason;
2407
2408
2409
2410 if (op->state == st_done) {
2411 finalize_op(op, msg, false);
2412 return;
2413 }
2414
2415 device = crm_element_value(msg, F_STONITH_DEVICE);
2416
2417 if ((op->phase == 2) && !pcmk__result_ok(&op->result)) {
2418
2419
2420
2421 crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
2422 "after successful 'off'",
2423 device, pcmk_exec_status_str(op->result.execution_status),
2424 (reason == NULL)? "" : ": ",
2425 (reason == NULL)? "" : reason,
2426 op->target);
2427 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2428 } else {
2429 crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: "
2430 "%s%s%s%s",
2431 op->action, op->target,
2432 ((device == NULL)? "" : " using "),
2433 ((device == NULL)? "" : device),
2434 op->client_name,
2435 op->originator,
2436 pcmk_exec_status_str(op->result.execution_status),
2437 (reason == NULL)? "" : " (",
2438 (reason == NULL)? "" : reason,
2439 (reason == NULL)? "" : ")");
2440 }
2441
2442 if (pcmk__result_ok(&op->result)) {
2443
2444
2445 advance_topology_device_in_level(op, device, msg);
2446 return;
2447 } else {
2448
2449
2450 if (advance_topology_level(op, false) != pcmk_rc_ok) {
2451 op->state = st_failed;
2452 finalize_op(op, msg, false);
2453 return;
2454 }
2455 }
2456
2457 } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) {
2458 op->state = st_done;
2459 finalize_op(op, msg, false);
2460 return;
2461
2462 } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT)
2463 && (op->devices == NULL)) {
2464
2465 op->state = st_failed;
2466 finalize_op(op, msg, false);
2467 return;
2468
2469 } else {
2470
2471 }
2472
2473
2474 crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
2475 op->target, op->originator, op->client_name,
2476 pcmk_exec_status_str(op->result.execution_status));
2477 request_peer_fencing(op, NULL);
2478 }
2479
2480 gboolean
2481 stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
2482 {
2483 GHashTableIter iter;
2484 time_t now = time(NULL);
2485 remote_fencing_op_t *rop = NULL;
2486
2487 if (tolerance <= 0 || !stonith_remote_op_list || target == NULL ||
2488 action == NULL) {
2489 return FALSE;
2490 }
2491
2492 g_hash_table_iter_init(&iter, stonith_remote_op_list);
2493 while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
2494 if (strcmp(rop->target, target) != 0) {
2495 continue;
2496 } else if (rop->state != st_done) {
2497 continue;
2498
2499
2500
2501 } else if (strcmp(rop->action, action) != 0) {
2502 continue;
2503 } else if ((rop->completed + tolerance) < now) {
2504 continue;
2505 }
2506
2507 crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
2508 target, action, tolerance, rop->delegate, rop->originator);
2509 return TRUE;
2510 }
2511 return FALSE;
2512 }