This source file includes following definitions.
- sort_strings
- free_remote_query
- free_stonith_remote_op_list
- count_peer_device
- count_peer_devices
- find_peer_device
- grab_peer_device
- clear_remote_op_timers
- free_remote_op
- init_stonith_remote_op_hash_table
- op_requested_action
- op_phase_off
- op_phase_on
- undo_op_remap
- fencing_result2xml
- fenced_broadcast_op_result
- handle_local_reply_and_notify
- finalize_op_duplicates
- delegate_from_xml
- finalize_op
- remote_op_watchdog_done
- remote_op_timeout_one
- finalize_timed_out_op
- remote_op_timeout
- remote_op_query_timeout
- topology_is_empty
- add_required_device
- remove_required_device
- set_op_device_list
- topology_matches
- find_topology_for_host
- advance_topology_level
- merge_duplicates
- fencing_active_peers
- fenced_handle_manual_confirmation
- create_remote_stonith_op
- initiate_remote_stonith_op
- find_best_peer
- stonith_choose_peer
- get_device_timeout
- add_device_timeout
- get_peer_timeout
- get_op_total_timeout
- report_timeout_period
- advance_topology_device_in_level
- check_watchdog_fencing_and_wait
- request_peer_fencing
- sort_peers
- all_topology_devices_found
- parse_action_specific
- add_device_properties
- add_result
- process_remote_stonith_query
- fenced_process_fencing_reply
- stonith_check_fence_tolerance
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <stdio.h>
14 #include <sys/types.h>
15 #include <sys/wait.h>
16 #include <sys/stat.h>
17 #include <unistd.h>
18 #include <sys/utsname.h>
19
20 #include <stdlib.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <ctype.h>
24 #include <regex.h>
25
26 #include <crm/crm.h>
27 #include <crm/msg_xml.h>
28 #include <crm/common/ipc.h>
29 #include <crm/common/ipc_internal.h>
30 #include <crm/cluster/internal.h>
31
32 #include <crm/stonith-ng.h>
33 #include <crm/fencing/internal.h>
34 #include <crm/common/xml.h>
35 #include <crm/common/xml_internal.h>
36
37 #include <crm/common/util.h>
38 #include <pacemaker-fenced.h>
39
40 #define TIMEOUT_MULTIPLY_FACTOR 1.2
41
42
43
44
45
46
47
48 typedef struct device_properties_s {
49
50 gboolean verified;
51
52
53
54
55 gboolean executed[st_phase_max];
56
57 gboolean disallowed[st_phase_max];
58
59 int custom_action_timeout[st_phase_max];
60
61 int delay_max[st_phase_max];
62
63 int delay_base[st_phase_max];
64
65 uint32_t device_support_flags;
66 } device_properties_t;
67
68 typedef struct {
69
70 char *host;
71
72 gboolean tried;
73
74 int ndevices;
75
76 GHashTable *devices;
77 } peer_device_info_t;
78
79 GHashTable *stonith_remote_op_list = NULL;
80
81 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
82 int call_options);
83
84 static void request_peer_fencing(remote_fencing_op_t *op,
85 peer_device_info_t *peer);
86 static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
87 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
88 static int get_op_total_timeout(const remote_fencing_op_t *op,
89 const peer_device_info_t *chosen_peer);
90
91 static gint
92 sort_strings(gconstpointer a, gconstpointer b)
93 {
94 return strcmp(a, b);
95 }
96
97 static void
98 free_remote_query(gpointer data)
99 {
100 if (data != NULL) {
101 peer_device_info_t *peer = data;
102
103 g_hash_table_destroy(peer->devices);
104 free(peer->host);
105 free(peer);
106 }
107 }
108
109 void
110 free_stonith_remote_op_list(void)
111 {
112 if (stonith_remote_op_list != NULL) {
113 g_hash_table_destroy(stonith_remote_op_list);
114 stonith_remote_op_list = NULL;
115 }
116 }
117
118 struct peer_count_data {
119 const remote_fencing_op_t *op;
120 gboolean verified_only;
121 uint32_t support_action_only;
122 int count;
123 };
124
125
126
127
128
129
130
131
132
133 static void
134 count_peer_device(gpointer key, gpointer value, gpointer user_data)
135 {
136 device_properties_t *props = (device_properties_t*)value;
137 struct peer_count_data *data = user_data;
138
139 if (!props->executed[data->op->phase]
140 && (!data->verified_only || props->verified)
141 && ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) {
142 ++(data->count);
143 }
144 }
145
146
147
148
149
150
151
152
153
154
155
156
157 static int
158 count_peer_devices(const remote_fencing_op_t *op,
159 const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only)
160 {
161 struct peer_count_data data;
162
163 data.op = op;
164 data.verified_only = verified_only;
165 data.support_action_only = support_on_action_only;
166 data.count = 0;
167 if (peer) {
168 g_hash_table_foreach(peer->devices, count_peer_device, &data);
169 }
170 return data.count;
171 }
172
173
174
175
176
177
178
179
180
181
182
183 static device_properties_t *
184 find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer,
185 const char *device, uint32_t support_action_only)
186 {
187 device_properties_t *props = g_hash_table_lookup(peer->devices, device);
188
189 if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) {
190 return NULL;
191 }
192 return (props && !props->executed[op->phase]
193 && !props->disallowed[op->phase])? props : NULL;
194 }
195
196
197
198
199
200
201
202
203
204
205
206
207 static gboolean
208 grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer,
209 const char *device, gboolean verified_devices_only)
210 {
211 device_properties_t *props = find_peer_device(op, peer, device,
212 fenced_support_flag(op->action));
213
214 if ((props == NULL) || (verified_devices_only && !props->verified)) {
215 return FALSE;
216 }
217
218 crm_trace("Removing %s from %s (%d remaining)",
219 device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none));
220 props->executed[op->phase] = TRUE;
221 return TRUE;
222 }
223
224 static void
225 clear_remote_op_timers(remote_fencing_op_t * op)
226 {
227 if (op->query_timer) {
228 g_source_remove(op->query_timer);
229 op->query_timer = 0;
230 }
231 if (op->op_timer_total) {
232 g_source_remove(op->op_timer_total);
233 op->op_timer_total = 0;
234 }
235 if (op->op_timer_one) {
236 g_source_remove(op->op_timer_one);
237 op->op_timer_one = 0;
238 }
239 }
240
241 static void
242 free_remote_op(gpointer data)
243 {
244 remote_fencing_op_t *op = data;
245
246 crm_log_xml_debug(op->request, "Destroying");
247
248 clear_remote_op_timers(op);
249
250 free(op->id);
251 free(op->action);
252 free(op->delegate);
253 free(op->target);
254 free(op->client_id);
255 free(op->client_name);
256 free(op->originator);
257
258 if (op->query_results) {
259 g_list_free_full(op->query_results, free_remote_query);
260 }
261 if (op->request) {
262 free_xml(op->request);
263 op->request = NULL;
264 }
265 if (op->devices_list) {
266 g_list_free_full(op->devices_list, free);
267 op->devices_list = NULL;
268 }
269 g_list_free_full(op->automatic_list, free);
270 g_list_free(op->duplicates);
271
272 pcmk__reset_result(&op->result);
273 free(op);
274 }
275
276 void
277 init_stonith_remote_op_hash_table(GHashTable **table)
278 {
279 if (*table == NULL) {
280 *table = pcmk__strkey_table(NULL, free_remote_op);
281 }
282 }
283
284
285
286
287
288
289
290
291
292 static const char *
293 op_requested_action(const remote_fencing_op_t *op)
294 {
295 return ((op->phase > st_phase_requested)? "reboot" : op->action);
296 }
297
298
299
300
301
302
303
304 static void
305 op_phase_off(remote_fencing_op_t *op)
306 {
307 crm_info("Remapping multiple-device reboot targeting %s to 'off' "
308 CRM_XS " id=%.8s", op->target, op->id);
309 op->phase = st_phase_off;
310
311
312
313
314 strcpy(op->action, "off");
315 }
316
317
318
319
320
321
322
323 static void
324 op_phase_on(remote_fencing_op_t *op)
325 {
326 GList *iter = NULL;
327
328 crm_info("Remapped 'off' targeting %s complete, "
329 "remapping to 'on' for %s " CRM_XS " id=%.8s",
330 op->target, op->client_name, op->id);
331 op->phase = st_phase_on;
332 strcpy(op->action, "on");
333
334
335
336
337 for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
338 GList *match = g_list_find_custom(op->devices_list, iter->data,
339 sort_strings);
340
341 if (match) {
342 op->devices_list = g_list_remove(op->devices_list, match->data);
343 }
344 }
345 g_list_free_full(op->automatic_list, free);
346 op->automatic_list = NULL;
347
348
349 op->devices = op->devices_list;
350 }
351
352
353
354
355
356
357
358 static void
359 undo_op_remap(remote_fencing_op_t *op)
360 {
361 if (op->phase > 0) {
362 crm_info("Undoing remap of reboot targeting %s for %s "
363 CRM_XS " id=%.8s", op->target, op->client_name, op->id);
364 op->phase = st_phase_requested;
365 strcpy(op->action, "reboot");
366 }
367 }
368
369
370
371
372
373
374
375
376
377
378 static xmlNode *
379 fencing_result2xml(const remote_fencing_op_t *op)
380 {
381 xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
382
383 crm_xml_add_int(notify_data, "state", op->state);
384 crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
385 crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
386 crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
387 crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id);
388 crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
389 crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
390 crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
391
392 return notify_data;
393 }
394
395
396
397
398
399
400
401
402 void
403 fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged)
404 {
405 static int count = 0;
406 xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
407 xmlNode *notify_data = fencing_result2xml(op);
408
409 count++;
410 crm_trace("Broadcasting result to peers");
411 crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY);
412 crm_xml_add(bcast, F_SUBTYPE, "broadcast");
413 crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY);
414 crm_xml_add_int(bcast, "count", count);
415
416 if (op_merged) {
417 pcmk__xe_set_bool_attr(bcast, F_STONITH_MERGED, true);
418 }
419
420 stonith__xe_set_result(notify_data, &op->result);
421
422 add_message_xml(bcast, F_STONITH_CALLDATA, notify_data);
423 send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
424 free_xml(notify_data);
425 free_xml(bcast);
426
427 return;
428 }
429
430
431
432
433
434
435
436
437 static void
438 handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
439 {
440 xmlNode *notify_data = NULL;
441 xmlNode *reply = NULL;
442 pcmk__client_t *client = NULL;
443
444 if (op->notify_sent == TRUE) {
445
446 return;
447 }
448
449
450 crm_xml_add_int(data, "state", op->state);
451 crm_xml_add(data, F_STONITH_TARGET, op->target);
452 crm_xml_add(data, F_STONITH_OPERATION, op->action);
453
454 reply = fenced_construct_reply(op->request, data, &op->result);
455 crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
456
457
458 client = pcmk__find_client_by_id(op->client_id);
459 if (client == NULL) {
460 crm_trace("Skipping reply to %s: no longer a client", op->client_id);
461 } else {
462 do_local_reply(reply, client, op->call_options);
463 }
464
465
466 notify_data = fencing_result2xml(op);
467 fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data);
468 free_xml(notify_data);
469 fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
470
471
472 op->notify_sent = TRUE;
473 free_xml(reply);
474 }
475
476
477
478
479
480
481
482
483 static void
484 finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data)
485 {
486 for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
487 remote_fencing_op_t *other = iter->data;
488
489 if (other->state == st_duplicate) {
490 other->state = op->state;
491 crm_debug("Performing duplicate notification for %s@%s: %s "
492 CRM_XS " id=%.8s",
493 other->client_name, other->originator,
494 pcmk_exec_status_str(op->result.execution_status),
495 other->id);
496 pcmk__copy_result(&op->result, &other->result);
497 finalize_op(other, data, true);
498
499 } else {
500
501 crm_err("Skipping duplicate notification for %s@%s "
502 CRM_XS " state=%s id=%.8s",
503 other->client_name, other->originator,
504 stonith_op_state_str(other->state), other->id);
505 }
506 }
507 }
508
509 static char *
510 delegate_from_xml(xmlNode *xml)
511 {
512 xmlNode *match = get_xpath_object("//@" F_STONITH_DELEGATE, xml, LOG_NEVER);
513
514 if (match == NULL) {
515 return crm_element_value_copy(xml, F_ORIG);
516 } else {
517 return crm_element_value_copy(match, F_STONITH_DELEGATE);
518 }
519 }
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537 static void
538 finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
539 {
540 int level = LOG_ERR;
541 const char *subt = NULL;
542 xmlNode *local_data = NULL;
543 gboolean op_merged = FALSE;
544
545 CRM_CHECK((op != NULL), return);
546
547
548 clear_remote_op_timers(op);
549
550 if (op->notify_sent) {
551
552 crm_notice("Operation '%s'%s%s by %s for %s@%s%s: "
553 "Result arrived too late " CRM_XS " id=%.8s",
554 op->action, (op->target? " targeting " : ""),
555 (op->target? op->target : ""),
556 (op->delegate? op->delegate : "unknown node"),
557 op->client_name, op->originator,
558 (op_merged? " (merged)" : ""),
559 op->id);
560 return;
561 }
562
563 set_fencing_completed(op);
564 undo_op_remap(op);
565
566 if (data == NULL) {
567 data = create_xml_node(NULL, "remote-op");
568 local_data = data;
569
570 } else if (op->delegate == NULL) {
571 switch (op->result.execution_status) {
572 case PCMK_EXEC_NO_FENCE_DEVICE:
573 break;
574
575 case PCMK_EXEC_INVALID:
576 if (op->result.exit_status != CRM_EX_EXPIRED) {
577 op->delegate = delegate_from_xml(data);
578 }
579 break;
580
581 default:
582 op->delegate = delegate_from_xml(data);
583 break;
584 }
585 }
586
587 if (dup || (crm_element_value(data, F_STONITH_MERGED) != NULL)) {
588 op_merged = true;
589 }
590
591
592
593
594 subt = crm_element_value(data, F_SUBTYPE);
595 if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
596
597 fenced_broadcast_op_result(op, op_merged);
598 free_xml(local_data);
599 return;
600 }
601
602 if (pcmk__result_ok(&op->result) || dup
603 || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
604 level = LOG_NOTICE;
605 }
606 do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) "
607 CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""),
608 (op->target? op->target : ""),
609 (op->delegate? op->delegate : "unknown node"),
610 op->client_name, op->originator,
611 (op_merged? " (merged)" : ""),
612 crm_exit_str(op->result.exit_status),
613 pcmk_exec_status_str(op->result.execution_status),
614 ((op->result.exit_reason == NULL)? "" : ": "),
615 ((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
616 op->id);
617
618 handle_local_reply_and_notify(op, data);
619
620 if (!dup) {
621 finalize_op_duplicates(op, data);
622 }
623
624
625
626
627 if (op->query_results) {
628 g_list_free_full(op->query_results, free_remote_query);
629 op->query_results = NULL;
630 }
631 if (op->request) {
632 free_xml(op->request);
633 op->request = NULL;
634 }
635
636 free_xml(local_data);
637 }
638
639
640
641
642
643
644
645
646
647 static gboolean
648 remote_op_watchdog_done(gpointer userdata)
649 {
650 remote_fencing_op_t *op = userdata;
651
652 op->op_timer_one = 0;
653
654 crm_notice("Self-fencing (%s) by %s for %s assumed complete "
655 CRM_XS " id=%.8s",
656 op->action, op->target, op->client_name, op->id);
657 op->state = st_done;
658 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
659 finalize_op(op, NULL, false);
660 return G_SOURCE_REMOVE;
661 }
662
663 static gboolean
664 remote_op_timeout_one(gpointer userdata)
665 {
666 remote_fencing_op_t *op = userdata;
667
668 op->op_timer_one = 0;
669
670 crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
671 " id=%.8s", op->action, op->target, op->client_name, op->id);
672 pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
673 "Peer did not return fence result within timeout");
674
675
676 if (op->delay > 0) {
677 op->delay = 0;
678 crm_trace("Try another device for '%s' action targeting %s "
679 "for client %s without delay " CRM_XS " id=%.8s",
680 op->action, op->target, op->client_name, op->id);
681 }
682
683
684 request_peer_fencing(op, NULL);
685 return G_SOURCE_REMOVE;
686 }
687
688
689
690
691
692
693
694
695 static void
696 finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
697 {
698 crm_debug("Action '%s' targeting %s for client %s timed out "
699 CRM_XS " id=%.8s",
700 op->action, op->target, op->client_name, op->id);
701
702 if (op->phase == st_phase_on) {
703
704
705
706
707 op->state = st_done;
708 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
709 } else {
710 op->state = st_failed;
711 pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
712 }
713 finalize_op(op, NULL, false);
714 }
715
716
717
718
719
720
721
722
723
724 static gboolean
725 remote_op_timeout(gpointer userdata)
726 {
727 remote_fencing_op_t *op = userdata;
728
729 op->op_timer_total = 0;
730
731 if (op->state == st_done) {
732 crm_debug("Action '%s' targeting %s for client %s already completed "
733 CRM_XS " id=%.8s",
734 op->action, op->target, op->client_name, op->id);
735 } else {
736 finalize_timed_out_op(userdata, "Fencing did not complete within a "
737 "total timeout based on the "
738 "configured timeout and retries for "
739 "any devices attempted");
740 }
741 return G_SOURCE_REMOVE;
742 }
743
744 static gboolean
745 remote_op_query_timeout(gpointer data)
746 {
747 remote_fencing_op_t *op = data;
748
749 op->query_timer = 0;
750
751 if (op->state == st_done) {
752 crm_debug("Operation %.8s targeting %s already completed",
753 op->id, op->target);
754 } else if (op->state == st_exec) {
755 crm_debug("Operation %.8s targeting %s already in progress",
756 op->id, op->target);
757 } else if (op->query_results) {
758
759 crm_debug("Query %.8s targeting %s complete (state=%s)",
760 op->id, op->target, stonith_op_state_str(op->state));
761 request_peer_fencing(op, NULL);
762 } else {
763 crm_debug("Query %.8s targeting %s timed out (state=%s)",
764 op->id, op->target, stonith_op_state_str(op->state));
765 finalize_timed_out_op(op, "No capable peers replied to device query "
766 "within timeout");
767 }
768
769 return G_SOURCE_REMOVE;
770 }
771
772 static gboolean
773 topology_is_empty(stonith_topology_t *tp)
774 {
775 int i;
776
777 if (tp == NULL) {
778 return TRUE;
779 }
780
781 for (i = 0; i < ST_LEVEL_MAX; i++) {
782 if (tp->levels[i] != NULL) {
783 return FALSE;
784 }
785 }
786 return TRUE;
787 }
788
789
790
791
792
793
794
795
796 static void
797 add_required_device(remote_fencing_op_t *op, const char *device)
798 {
799 GList *match = g_list_find_custom(op->automatic_list, device,
800 sort_strings);
801
802 if (!match) {
803 op->automatic_list = g_list_prepend(op->automatic_list, strdup(device));
804 }
805 }
806
807
808
809
810
811
812
813
814 static void
815 remove_required_device(remote_fencing_op_t *op, const char *device)
816 {
817 GList *match = g_list_find_custom(op->automatic_list, device,
818 sort_strings);
819
820 if (match) {
821 op->automatic_list = g_list_remove(op->automatic_list, match->data);
822 }
823 }
824
825
826 static void
827 set_op_device_list(remote_fencing_op_t * op, GList *devices)
828 {
829 GList *lpc = NULL;
830
831 if (op->devices_list) {
832 g_list_free_full(op->devices_list, free);
833 op->devices_list = NULL;
834 }
835 for (lpc = devices; lpc != NULL; lpc = lpc->next) {
836 op->devices_list = g_list_append(op->devices_list, strdup(lpc->data));
837 }
838 op->devices = op->devices_list;
839 }
840
841
842
843
844
845
846
847
848
849
850 static gboolean
851 topology_matches(const stonith_topology_t *tp, const char *node)
852 {
853 regex_t r_patt;
854
855 CRM_CHECK(node && tp && tp->target, return FALSE);
856 switch (tp->kind) {
857 case fenced_target_by_attribute:
858
859
860
861
862
863
864 if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
865 crm_notice("Matched %s with %s by attribute", node, tp->target);
866 return TRUE;
867 }
868 break;
869
870 case fenced_target_by_pattern:
871
872
873
874 if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) {
875 crm_info("Bad regex '%s' for fencing level", tp->target);
876 } else {
877 int status = regexec(&r_patt, node, 0, NULL, 0);
878
879 regfree(&r_patt);
880 if (status == 0) {
881 crm_notice("Matched %s with %s by name", node, tp->target);
882 return TRUE;
883 }
884 }
885 break;
886
887 case fenced_target_by_name:
888 crm_trace("Testing %s against %s", node, tp->target);
889 return pcmk__str_eq(tp->target, node, pcmk__str_casei);
890
891 default:
892 break;
893 }
894 crm_trace("No match for %s with %s", node, tp->target);
895 return FALSE;
896 }
897
898 stonith_topology_t *
899 find_topology_for_host(const char *host)
900 {
901 GHashTableIter tIter;
902 stonith_topology_t *tp = g_hash_table_lookup(topology, host);
903
904 if(tp != NULL) {
905 crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
906 return tp;
907 }
908
909 g_hash_table_iter_init(&tIter, topology);
910 while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
911 if (topology_matches(tp, host)) {
912 crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
913 return tp;
914 }
915 }
916
917 crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
918 return NULL;
919 }
920
921
922
923
924
925
926
927
928
929
930
931
932 static int
933 advance_topology_level(remote_fencing_op_t *op, bool empty_ok)
934 {
935 stonith_topology_t *tp = NULL;
936
937 if (op->target) {
938 tp = find_topology_for_host(op->target);
939 }
940 if (topology_is_empty(tp)) {
941 return empty_ok? pcmk_rc_ok : ENODEV;
942 }
943
944 CRM_ASSERT(tp->levels != NULL);
945
946 stonith__set_call_options(op->call_options, op->id, st_opt_topology);
947
948
949 undo_op_remap(op);
950
951 do {
952 op->level++;
953
954 } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
955
956 if (op->level < ST_LEVEL_MAX) {
957 crm_trace("Attempting fencing level %d targeting %s (%d devices) "
958 "for client %s@%s (id=%.8s)",
959 op->level, op->target, g_list_length(tp->levels[op->level]),
960 op->client_name, op->originator, op->id);
961 set_op_device_list(op, tp->levels[op->level]);
962
963
964 if (op->level > 1 && op->delay > 0) {
965 op->delay = 0;
966 }
967
968 if ((g_list_next(op->devices_list) != NULL)
969 && pcmk__str_eq(op->action, "reboot", pcmk__str_none)) {
970
971
972
973
974
975 op_phase_off(op);
976 }
977 return pcmk_rc_ok;
978 }
979
980 crm_info("All %sfencing options targeting %s for client %s@%s failed "
981 CRM_XS " id=%.8s",
982 (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"",
983 op->target, op->client_name, op->originator, op->id);
984 return ENODEV;
985 }
986
987
988
989
990
991
992
993 static void
994 merge_duplicates(remote_fencing_op_t *op)
995 {
996 GHashTableIter iter;
997 remote_fencing_op_t *other = NULL;
998
999 time_t now = time(NULL);
1000
1001 g_hash_table_iter_init(&iter, stonith_remote_op_list);
1002 while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
1003 const char *other_action = op_requested_action(other);
1004
1005 if (!strcmp(op->id, other->id)) {
1006 continue;
1007 }
1008 if (other->state > st_exec) {
1009 crm_trace("%.8s not duplicate of %.8s: not in progress",
1010 op->id, other->id);
1011 continue;
1012 }
1013 if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) {
1014 crm_trace("%.8s not duplicate of %.8s: node %s vs. %s",
1015 op->id, other->id, op->target, other->target);
1016 continue;
1017 }
1018 if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) {
1019 crm_trace("%.8s not duplicate of %.8s: action %s vs. %s",
1020 op->id, other->id, op->action, other_action);
1021 continue;
1022 }
1023 if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) {
1024 crm_trace("%.8s not duplicate of %.8s: same client %s",
1025 op->id, other->id, op->client_name);
1026 continue;
1027 }
1028 if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) {
1029 crm_trace("%.8s not duplicate of %.8s: suicide for %s",
1030 op->id, other->id, other->target);
1031 continue;
1032 }
1033 if (!fencing_peer_active(crm_get_peer(0, other->originator))) {
1034 crm_notice("Failing action '%s' targeting %s originating from "
1035 "client %s@%s: Originator is dead " CRM_XS " id=%.8s",
1036 other->action, other->target, other->client_name,
1037 other->originator, other->id);
1038 crm_trace("%.8s not duplicate of %.8s: originator dead",
1039 op->id, other->id);
1040 other->state = st_failed;
1041 continue;
1042 }
1043 if ((other->total_timeout > 0)
1044 && (now > (other->total_timeout + other->created))) {
1045 crm_trace("%.8s not duplicate of %.8s: old (%ld vs. %ld + %d)",
1046 op->id, other->id, now, other->created,
1047 other->total_timeout);
1048 continue;
1049 }
1050
1051
1052
1053
1054 other->duplicates = g_list_append(other->duplicates, op);
1055 if (other->total_timeout == 0) {
1056 other->total_timeout = op->total_timeout =
1057 TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
1058 crm_trace("Best guess as to timeout used for %.8s: %d",
1059 other->id, other->total_timeout);
1060 }
1061 crm_notice("Merging fencing action '%s' targeting %s originating from "
1062 "client %s with identical request from %s@%s "
1063 CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds",
1064 op->action, op->target, op->client_name,
1065 other->client_name, other->originator,
1066 op->id, other->id, other->total_timeout);
1067 report_timeout_period(op, other->total_timeout);
1068 op->state = st_duplicate;
1069 }
1070 }
1071
1072 static uint32_t fencing_active_peers(void)
1073 {
1074 uint32_t count = 0;
1075 crm_node_t *entry;
1076 GHashTableIter gIter;
1077
1078 g_hash_table_iter_init(&gIter, crm_peer_cache);
1079 while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
1080 if(fencing_peer_active(entry)) {
1081 count++;
1082 }
1083 }
1084 return count;
1085 }
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096 int
1097 fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg)
1098 {
1099 remote_fencing_op_t *op = NULL;
1100 xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
1101
1102 CRM_CHECK(dev != NULL, return EPROTO);
1103
1104 crm_notice("Received manual confirmation that %s has been fenced",
1105 pcmk__s(crm_element_value(dev, F_STONITH_TARGET),
1106 "unknown target"));
1107 op = initiate_remote_stonith_op(client, msg, TRUE);
1108 if (op == NULL) {
1109 return EPROTO;
1110 }
1111 op->state = st_done;
1112 set_fencing_completed(op);
1113 op->delegate = strdup("a human");
1114
1115
1116 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1117 finalize_op(op, msg, false);
1118
1119
1120
1121
1122 return EINPROGRESS;
1123 }
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136 void *
1137 create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer)
1138 {
1139 remote_fencing_op_t *op = NULL;
1140 xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_NEVER);
1141 int call_options = 0;
1142 const char *operation = NULL;
1143
1144 init_stonith_remote_op_hash_table(&stonith_remote_op_list);
1145
1146
1147
1148 if (peer && dev) {
1149 const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
1150
1151 CRM_CHECK(op_id != NULL, return NULL);
1152
1153 op = g_hash_table_lookup(stonith_remote_op_list, op_id);
1154 if (op) {
1155 crm_debug("Reusing existing remote fencing op %.8s for %s",
1156 op_id, ((client == NULL)? "unknown client" : client));
1157 return op;
1158 }
1159 }
1160
1161 op = calloc(1, sizeof(remote_fencing_op_t));
1162 CRM_ASSERT(op != NULL);
1163
1164 crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
1165
1166 crm_element_value_int(request, F_STONITH_DELAY, &(op->delay));
1167
1168 if (peer && dev) {
1169 op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
1170 } else {
1171 op->id = crm_generate_uuid();
1172 }
1173
1174 g_hash_table_replace(stonith_remote_op_list, op->id, op);
1175
1176 op->state = st_query;
1177 op->replies_expected = fencing_active_peers();
1178 op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
1179 op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN);
1180 op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE);
1181 op->created = time(NULL);
1182
1183 if (op->originator == NULL) {
1184
1185 op->originator = strdup(stonith_our_uname);
1186 }
1187
1188 CRM_LOG_ASSERT(client != NULL);
1189 if (client) {
1190 op->client_id = strdup(client);
1191 }
1192
1193
1194
1195 operation = crm_element_value(request, F_STONITH_OPERATION);
1196
1197 if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
1198 op->client_name = crm_strdup_printf("%s.%lu", crm_system_name,
1199 (unsigned long) getpid());
1200 } else {
1201 op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);
1202 }
1203
1204 op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
1205 op->request = copy_xml(request);
1206 crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
1207 op->call_options = call_options;
1208
1209 crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid));
1210
1211 crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, "
1212 "base timeout %d, %u %s expected)",
1213 (peer && dev)? "Recorded" : "Generated", op->id, op->action,
1214 op->target, op->client_name, op->base_timeout,
1215 op->replies_expected,
1216 pcmk__plural_alt(op->replies_expected, "reply", "replies"));
1217
1218 if (op->call_options & st_opt_cs_nodeid) {
1219 int nodeid;
1220 crm_node_t *node;
1221
1222 pcmk__scan_min_int(op->target, &nodeid, 0);
1223 node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
1224
1225
1226 stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid);
1227
1228 if (node && node->uname) {
1229 free(op->target);
1230 op->target = strdup(node->uname);
1231
1232 } else {
1233 crm_warn("Could not expand nodeid '%s' into a host name", op->target);
1234 }
1235 }
1236
1237
1238 merge_duplicates(op);
1239
1240 if (op->state != st_duplicate) {
1241
1242 fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
1243 }
1244
1245
1246 stonith_fence_history_trim();
1247
1248 return op;
1249 }
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261 remote_fencing_op_t *
1262 initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request,
1263 gboolean manual_ack)
1264 {
1265 int query_timeout = 0;
1266 xmlNode *query = NULL;
1267 const char *client_id = NULL;
1268 remote_fencing_op_t *op = NULL;
1269 const char *relay_op_id = NULL;
1270 const char *operation = NULL;
1271
1272 if (client) {
1273 client_id = client->id;
1274 } else {
1275 client_id = crm_element_value(request, F_STONITH_CLIENTID);
1276 }
1277
1278 CRM_LOG_ASSERT(client_id != NULL);
1279 op = create_remote_stonith_op(client_id, request, FALSE);
1280 op->owner = TRUE;
1281 if (manual_ack) {
1282 return op;
1283 }
1284
1285 CRM_CHECK(op->action, return NULL);
1286
1287 if (advance_topology_level(op, true) != pcmk_rc_ok) {
1288 op->state = st_failed;
1289 }
1290
1291 switch (op->state) {
1292 case st_failed:
1293
1294 pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
1295 "All topology levels failed");
1296 crm_warn("Could not request peer fencing (%s) targeting %s "
1297 CRM_XS " id=%.8s", op->action, op->target, op->id);
1298 finalize_op(op, NULL, false);
1299 return op;
1300
1301 case st_duplicate:
1302 crm_info("Requesting peer fencing (%s) targeting %s (duplicate) "
1303 CRM_XS " id=%.8s", op->action, op->target, op->id);
1304 return op;
1305
1306 default:
1307 crm_notice("Requesting peer fencing (%s) targeting %s "
1308 CRM_XS " id=%.8s state=%s base_timeout=%d",
1309 op->action, op->target, op->id,
1310 stonith_op_state_str(op->state), op->base_timeout);
1311 }
1312
1313 query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
1314 NULL, op->call_options);
1315
1316 crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
1317 crm_xml_add(query, F_STONITH_TARGET, op->target);
1318 crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op));
1319 crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
1320 crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
1321 crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
1322 crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
1323
1324
1325 operation = crm_element_value(request, F_STONITH_OPERATION);
1326 if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
1327 relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
1328 if (relay_op_id) {
1329 crm_xml_add(query, F_STONITH_REMOTE_OP_ID_RELAY, relay_op_id);
1330 }
1331 }
1332
1333 send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
1334 free_xml(query);
1335
1336 query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
1337 op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
1338
1339 return op;
1340 }
1341
1342 enum find_best_peer_options {
1343
1344 FIND_PEER_SKIP_TARGET = 0x0001,
1345
1346 FIND_PEER_TARGET_ONLY = 0x0002,
1347
1348 FIND_PEER_VERIFIED_ONLY = 0x0004,
1349 };
1350
1351 static peer_device_info_t *
1352 find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
1353 {
1354 GList *iter = NULL;
1355 gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
1356
1357 if (!device && pcmk_is_set(op->call_options, st_opt_topology)) {
1358 return NULL;
1359 }
1360
1361 for (iter = op->query_results; iter != NULL; iter = iter->next) {
1362 peer_device_info_t *peer = iter->data;
1363
1364 crm_trace("Testing result from %s targeting %s with %d device%s: %d %x",
1365 peer->host, op->target, peer->ndevices,
1366 pcmk__plural_s(peer->ndevices), peer->tried, options);
1367 if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
1368 continue;
1369 }
1370 if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
1371 continue;
1372 }
1373
1374 if (pcmk_is_set(op->call_options, st_opt_topology)) {
1375
1376 if (grab_peer_device(op, peer, device, verified_devices_only)) {
1377 return peer;
1378 }
1379
1380 } else if (!peer->tried
1381 && count_peer_devices(op, peer, verified_devices_only,
1382 fenced_support_flag(op->action))) {
1383
1384 crm_trace("Simple fencing");
1385 return peer;
1386 }
1387 }
1388
1389 return NULL;
1390 }
1391
1392 static peer_device_info_t *
1393 stonith_choose_peer(remote_fencing_op_t * op)
1394 {
1395 const char *device = NULL;
1396 peer_device_info_t *peer = NULL;
1397 uint32_t active = fencing_active_peers();
1398
1399 do {
1400 if (op->devices) {
1401 device = op->devices->data;
1402 crm_trace("Checking for someone to fence (%s) %s using %s",
1403 op->action, op->target, device);
1404 } else {
1405 crm_trace("Checking for someone to fence (%s) %s",
1406 op->action, op->target);
1407 }
1408
1409
1410 peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
1411 if (peer) {
1412 crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
1413 return peer;
1414 }
1415
1416 if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
1417 crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
1418 return NULL;
1419 }
1420
1421
1422 peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
1423 if (peer) {
1424 crm_trace("Found best unverified peer %s", peer->host);
1425 return peer;
1426 }
1427
1428
1429
1430
1431 if (op->phase != st_phase_on) {
1432 peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
1433 if (peer) {
1434 crm_trace("%s will fence itself", peer->host);
1435 return peer;
1436 }
1437 }
1438
1439
1440
1441
1442 } while ((op->phase != st_phase_on)
1443 && pcmk_is_set(op->call_options, st_opt_topology)
1444 && (advance_topology_level(op, false) == pcmk_rc_ok));
1445
1446 if ((stonith_watchdog_timeout_ms > 0)
1447 && pcmk__is_fencing_action(op->action)
1448 && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
1449 && node_does_watchdog_fencing(op->target)) {
1450 crm_info("Couldn't contact watchdog-fencing target-node (%s)",
1451 op->target);
1452
1453 } else {
1454 crm_notice("Couldn't find anyone to fence (%s) %s using %s",
1455 op->action, op->target, (device? device : "any device"));
1456 }
1457 return NULL;
1458 }
1459
1460 static int
1461 get_device_timeout(const remote_fencing_op_t *op,
1462 const peer_device_info_t *peer, const char *device,
1463 bool with_delay)
1464 {
1465 device_properties_t *props;
1466 int delay = 0;
1467
1468 if (!peer || !device) {
1469 return op->base_timeout;
1470 }
1471
1472 props = g_hash_table_lookup(peer->devices, device);
1473 if (!props) {
1474 return op->base_timeout;
1475 }
1476
1477
1478 if (with_delay && op->delay >= 0) {
1479
1480 delay = (props->delay_max[op->phase] > 0 ?
1481 props->delay_max[op->phase] : props->delay_base[op->phase]);
1482 }
1483
1484 return (props->custom_action_timeout[op->phase]?
1485 props->custom_action_timeout[op->phase] : op->base_timeout)
1486 + delay;
1487 }
1488
1489 struct timeout_data {
1490 const remote_fencing_op_t *op;
1491 const peer_device_info_t *peer;
1492 int total_timeout;
1493 };
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503 static void
1504 add_device_timeout(gpointer key, gpointer value, gpointer user_data)
1505 {
1506 const char *device_id = key;
1507 device_properties_t *props = value;
1508 struct timeout_data *timeout = user_data;
1509
1510 if (!props->executed[timeout->op->phase]
1511 && !props->disallowed[timeout->op->phase]) {
1512 timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer,
1513 device_id, true);
1514 }
1515 }
1516
1517 static int
1518 get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer)
1519 {
1520 struct timeout_data timeout;
1521
1522 timeout.op = op;
1523 timeout.peer = peer;
1524 timeout.total_timeout = 0;
1525
1526 g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
1527
1528 return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
1529 }
1530
1531 static int
1532 get_op_total_timeout(const remote_fencing_op_t *op,
1533 const peer_device_info_t *chosen_peer)
1534 {
1535 int total_timeout = 0;
1536 stonith_topology_t *tp = find_topology_for_host(op->target);
1537
1538 if (pcmk_is_set(op->call_options, st_opt_topology) && tp) {
1539 int i;
1540 GList *device_list = NULL;
1541 GList *iter = NULL;
1542 GList *auto_list = NULL;
1543
1544 if (pcmk__str_eq(op->action, "on", pcmk__str_none)
1545 && (op->automatic_list != NULL)) {
1546 auto_list = g_list_copy(op->automatic_list);
1547 }
1548
1549
1550
1551
1552
1553
1554
1555
1556 for (i = 0; i < ST_LEVEL_MAX; i++) {
1557 if (!tp->levels[i]) {
1558 continue;
1559 }
1560 for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
1561
1562
1563
1564 if ((stonith_watchdog_timeout_ms > 0)
1565 && pcmk__is_fencing_action(op->action)
1566 && pcmk__str_eq(device_list->data, STONITH_WATCHDOG_ID,
1567 pcmk__str_none)
1568 && node_does_watchdog_fencing(op->target)) {
1569 total_timeout += stonith_watchdog_timeout_ms / 1000;
1570 continue;
1571 }
1572
1573 for (iter = op->query_results; iter != NULL; iter = iter->next) {
1574 const peer_device_info_t *peer = iter->data;
1575
1576 if (auto_list) {
1577 GList *match = g_list_find_custom(auto_list, device_list->data,
1578 sort_strings);
1579 if (match) {
1580 auto_list = g_list_remove(auto_list, match->data);
1581 }
1582 }
1583
1584 if (find_peer_device(op, peer, device_list->data,
1585 fenced_support_flag(op->action))) {
1586 total_timeout += get_device_timeout(op, peer,
1587 device_list->data,
1588 true);
1589 break;
1590 }
1591 }
1592 }
1593 }
1594
1595
1596 if (auto_list) {
1597 for (iter = auto_list; iter != NULL; iter = iter->next) {
1598 GList *iter2 = NULL;
1599
1600 for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) {
1601 peer_device_info_t *peer = iter2->data;
1602 if (find_peer_device(op, peer, iter->data, st_device_supports_on)) {
1603 total_timeout += get_device_timeout(op, peer,
1604 iter->data, true);
1605 break;
1606 }
1607 }
1608 }
1609 }
1610
1611 g_list_free(auto_list);
1612
1613 } else if (chosen_peer) {
1614 total_timeout = get_peer_timeout(op, chosen_peer);
1615 } else {
1616 total_timeout = op->base_timeout;
1617 }
1618
1619
1620
1621
1622 return ((total_timeout ? total_timeout : op->base_timeout)
1623 + (op->delay > 0 ? op->delay : 0));
1624 }
1625
1626 static void
1627 report_timeout_period(remote_fencing_op_t * op, int op_timeout)
1628 {
1629 GList *iter = NULL;
1630 xmlNode *update = NULL;
1631 const char *client_node = NULL;
1632 const char *client_id = NULL;
1633 const char *call_id = NULL;
1634
1635 if (op->call_options & st_opt_sync_call) {
1636
1637
1638
1639
1640 return;
1641 } else if (!op->request) {
1642 return;
1643 }
1644
1645 crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id);
1646 client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE);
1647 call_id = crm_element_value(op->request, F_STONITH_CALLID);
1648 client_id = crm_element_value(op->request, F_STONITH_CLIENTID);
1649 if (!client_node || !call_id || !client_id) {
1650 return;
1651 }
1652
1653 if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) {
1654
1655 do_stonith_async_timeout_update(client_id, call_id, op_timeout);
1656 return;
1657 }
1658
1659
1660 update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
1661 crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id);
1662 crm_xml_add(update, F_STONITH_CLIENTID, client_id);
1663 crm_xml_add(update, F_STONITH_CALLID, call_id);
1664 crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout);
1665
1666 send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE);
1667
1668 free_xml(update);
1669
1670 for (iter = op->duplicates; iter != NULL; iter = iter->next) {
1671 remote_fencing_op_t *dup = iter->data;
1672
1673 crm_trace("Reporting timeout for duplicate %.8s to client %s",
1674 dup->id, dup->client_name);
1675 report_timeout_period(iter->data, op_timeout);
1676 }
1677 }
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687 static void
1688 advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
1689 xmlNode *msg)
1690 {
1691
1692 if (op->devices) {
1693 op->devices = op->devices->next;
1694 }
1695
1696
1697 if ((op->phase == st_phase_requested)
1698 && pcmk__str_eq(op->action, "on", pcmk__str_none)) {
1699
1700 remove_required_device(op, device);
1701
1702
1703
1704
1705 if (op->devices == NULL) {
1706 op->devices = op->automatic_list;
1707 }
1708 }
1709
1710 if ((op->devices == NULL) && (op->phase == st_phase_off)) {
1711
1712
1713
1714
1715 op_phase_on(op);
1716 }
1717
1718
1719 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1720
1721 if (op->devices) {
1722
1723 crm_trace("Next targeting %s on behalf of %s@%s",
1724 op->target, op->client_name, op->originator);
1725
1726
1727 if (op->delay > 0) {
1728 op->delay = 0;
1729 }
1730
1731 request_peer_fencing(op, NULL);
1732 } else {
1733
1734 crm_trace("Marking complex fencing op targeting %s as complete",
1735 op->target);
1736 op->state = st_done;
1737 finalize_op(op, msg, false);
1738 }
1739 }
1740
1741 static gboolean
1742 check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
1743 {
1744 if (node_does_watchdog_fencing(op->target)) {
1745
1746 crm_notice("Waiting %lds for %s to self-fence (%s) for "
1747 "client %s " CRM_XS " id=%.8s",
1748 (stonith_watchdog_timeout_ms / 1000),
1749 op->target, op->action, op->client_name, op->id);
1750
1751 if (op->op_timer_one) {
1752 g_source_remove(op->op_timer_one);
1753 }
1754 op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms,
1755 remote_op_watchdog_done, op);
1756 return TRUE;
1757 } else {
1758 crm_debug("Skipping fallback to watchdog-fencing as %s is "
1759 "not in host-list", op->target);
1760 }
1761 return FALSE;
1762 }
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772 static void
1773 request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
1774 {
1775 const char *device = NULL;
1776 int timeout;
1777
1778 CRM_CHECK(op != NULL, return);
1779
1780 crm_trace("Action %.8s targeting %s for %s is %s",
1781 op->id, op->target, op->client_name,
1782 stonith_op_state_str(op->state));
1783
1784 if ((op->phase == st_phase_on) && (op->devices != NULL)) {
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796 device = op->devices->data;
1797 if (pcmk__str_eq(fenced_device_reboot_action(device), "off",
1798 pcmk__str_none)) {
1799 crm_info("Not turning %s back on using %s because the device is "
1800 "configured to stay off (pcmk_reboot_action='off')",
1801 op->target, device);
1802 advance_topology_device_in_level(op, device, NULL);
1803 return;
1804 }
1805 if (!fenced_device_supports_on(device)) {
1806 crm_info("Not turning %s back on using %s because the agent "
1807 "doesn't support 'on'", op->target, device);
1808 advance_topology_device_in_level(op, device, NULL);
1809 return;
1810 }
1811 }
1812
1813 timeout = op->base_timeout;
1814 if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) {
1815 peer = stonith_choose_peer(op);
1816 }
1817
1818 if (!op->op_timer_total) {
1819 op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer);
1820 op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
1821 report_timeout_period(op, op->total_timeout);
1822 crm_info("Total timeout set to %d for peer's fencing targeting %s for %s"
1823 CRM_XS "id=%.8s",
1824 op->total_timeout, op->target, op->client_name, op->id);
1825 }
1826
1827 if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) {
1828
1829
1830
1831
1832
1833
1834
1835
1836 peer = stonith_choose_peer(op);
1837
1838 device = op->devices->data;
1839
1840
1841
1842
1843 timeout = get_device_timeout(op, peer, device, false);
1844 }
1845
1846 if (peer) {
1847
1848
1849
1850 int timeout_one = (op->delay > 0 ?
1851 TIMEOUT_MULTIPLY_FACTOR * op->delay : 0);
1852 xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
1853
1854 crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id);
1855 crm_xml_add(remote_op, F_STONITH_TARGET, op->target);
1856 crm_xml_add(remote_op, F_STONITH_ACTION, op->action);
1857 crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator);
1858 crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id);
1859 crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
1860 crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
1861 crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
1862 crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay);
1863
1864 if (device) {
1865 timeout_one += TIMEOUT_MULTIPLY_FACTOR *
1866 get_device_timeout(op, peer, device, true);
1867 crm_notice("Requesting that %s perform '%s' action targeting %s "
1868 "using %s " CRM_XS " for client %s (%ds)",
1869 peer->host, op->action, op->target, device,
1870 op->client_name, timeout_one);
1871 crm_xml_add(remote_op, F_STONITH_DEVICE, device);
1872
1873 } else {
1874 timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
1875 crm_notice("Requesting that %s perform '%s' action targeting %s "
1876 CRM_XS " for client %s (%ds, %lds)",
1877 peer->host, op->action, op->target, op->client_name,
1878 timeout_one, stonith_watchdog_timeout_ms);
1879 }
1880
1881 op->state = st_exec;
1882 if (op->op_timer_one) {
1883 g_source_remove(op->op_timer_one);
1884 op->op_timer_one = 0;
1885 }
1886
1887 if (!((stonith_watchdog_timeout_ms > 0)
1888 && (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
1889 || (pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
1890 && pcmk__is_fencing_action(op->action)))
1891 && check_watchdog_fencing_and_wait(op))) {
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913 op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
1914 }
1915
1916 send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
1917 peer->tried = TRUE;
1918 free_xml(remote_op);
1919 return;
1920
1921 } else if (op->phase == st_phase_on) {
1922
1923
1924
1925 crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
1926 "after successful 'off'", device, op->target);
1927 advance_topology_device_in_level(op, device, NULL);
1928 return;
1929
1930 } else if (op->owner == FALSE) {
1931 crm_err("Fencing (%s) targeting %s for client %s is not ours to control",
1932 op->action, op->target, op->client_name);
1933
1934 } else if (op->query_timer == 0) {
1935
1936 crm_info("No remaining peers capable of fencing (%s) %s for client %s "
1937 CRM_XS " state=%s", op->action, op->target, op->client_name,
1938 stonith_op_state_str(op->state));
1939 CRM_CHECK(op->state < st_done, return);
1940 finalize_timed_out_op(op, "All nodes failed, or are unable, to "
1941 "fence target");
1942
1943 } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
1944
1945
1946
1947
1948 if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device,
1949 STONITH_WATCHDOG_ID, pcmk__str_null_matches)) {
1950 if (check_watchdog_fencing_and_wait(op)) {
1951 return;
1952 }
1953 }
1954
1955 if (op->state == st_query) {
1956 crm_info("No peers (out of %d) have devices capable of fencing "
1957 "(%s) %s for client %s " CRM_XS " state=%s",
1958 op->replies, op->action, op->target, op->client_name,
1959 stonith_op_state_str(op->state));
1960
1961 pcmk__reset_result(&op->result);
1962 pcmk__set_result(&op->result, CRM_EX_ERROR,
1963 PCMK_EXEC_NO_FENCE_DEVICE, NULL);
1964 } else {
1965 if (pcmk_is_set(op->call_options, st_opt_topology)) {
1966 pcmk__reset_result(&op->result);
1967 pcmk__set_result(&op->result, CRM_EX_ERROR,
1968 PCMK_EXEC_NO_FENCE_DEVICE, NULL);
1969 }
1970
1971
1972
1973
1974
1975
1976
1977 crm_info("No peers (out of %d) are capable of fencing (%s) %s "
1978 "for client %s " CRM_XS " state=%s",
1979 op->replies, op->action, op->target, op->client_name,
1980 stonith_op_state_str(op->state));
1981 }
1982
1983 op->state = st_failed;
1984 finalize_op(op, NULL, false);
1985
1986 } else {
1987 crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
1988 "for client %s " CRM_XS " id=%.8s",
1989 op->action, op->target, (device? " using " : ""),
1990 (device? device : ""), op->client_name, op->id);
1991 }
1992 }
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005 static gint
2006 sort_peers(gconstpointer a, gconstpointer b)
2007 {
2008 const peer_device_info_t *peer_a = a;
2009 const peer_device_info_t *peer_b = b;
2010
2011 return (peer_b->ndevices - peer_a->ndevices);
2012 }
2013
2014
2015
2016
2017
2018
2019
2020 static gboolean
2021 all_topology_devices_found(const remote_fencing_op_t *op)
2022 {
2023 GList *device = NULL;
2024 GList *iter = NULL;
2025 device_properties_t *match = NULL;
2026 stonith_topology_t *tp = NULL;
2027 gboolean skip_target = FALSE;
2028 int i;
2029
2030 tp = find_topology_for_host(op->target);
2031 if (!tp) {
2032 return FALSE;
2033 }
2034 if (pcmk__is_fencing_action(op->action)) {
2035
2036
2037 skip_target = TRUE;
2038 }
2039
2040 for (i = 0; i < ST_LEVEL_MAX; i++) {
2041 for (device = tp->levels[i]; device; device = device->next) {
2042 match = NULL;
2043 for (iter = op->query_results; iter && !match; iter = iter->next) {
2044 peer_device_info_t *peer = iter->data;
2045
2046 if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
2047 continue;
2048 }
2049 match = find_peer_device(op, peer, device->data, st_device_supports_none);
2050 }
2051 if (!match) {
2052 return FALSE;
2053 }
2054 }
2055 }
2056
2057 return TRUE;
2058 }
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072 static void
2073 parse_action_specific(const xmlNode *xml, const char *peer, const char *device,
2074 const char *action, remote_fencing_op_t *op,
2075 enum st_remap_phase phase, device_properties_t *props)
2076 {
2077 props->custom_action_timeout[phase] = 0;
2078 crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT,
2079 &props->custom_action_timeout[phase]);
2080 if (props->custom_action_timeout[phase]) {
2081 crm_trace("Peer %s with device %s returned %s action timeout %d",
2082 peer, device, action, props->custom_action_timeout[phase]);
2083 }
2084
2085 props->delay_max[phase] = 0;
2086 crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]);
2087 if (props->delay_max[phase]) {
2088 crm_trace("Peer %s with device %s returned maximum of random delay %d for %s",
2089 peer, device, props->delay_max[phase], action);
2090 }
2091
2092 props->delay_base[phase] = 0;
2093 crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]);
2094 if (props->delay_base[phase]) {
2095 crm_trace("Peer %s with device %s returned base delay %d for %s",
2096 peer, device, props->delay_base[phase], action);
2097 }
2098
2099
2100 if (pcmk__str_eq(action, "on", pcmk__str_none)) {
2101 int required = 0;
2102
2103 crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required);
2104 if (required) {
2105 crm_trace("Peer %s requires device %s to execute for action %s",
2106 peer, device, action);
2107 add_required_device(op, device);
2108 }
2109 }
2110
2111
2112
2113
2114 if (pcmk__xe_attr_is_true(xml, F_STONITH_ACTION_DISALLOWED)) {
2115 props->disallowed[phase] = TRUE;
2116 crm_trace("Peer %s is disallowed from executing %s for device %s",
2117 peer, action, device);
2118 }
2119 }
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130 static void
2131 add_device_properties(const xmlNode *xml, remote_fencing_op_t *op,
2132 peer_device_info_t *peer, const char *device)
2133 {
2134 xmlNode *child;
2135 int verified = 0;
2136 device_properties_t *props = calloc(1, sizeof(device_properties_t));
2137 int flags = st_device_supports_on;
2138
2139
2140 CRM_ASSERT(props != NULL);
2141 g_hash_table_insert(peer->devices, strdup(device), props);
2142
2143
2144 crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified);
2145 if (verified) {
2146 crm_trace("Peer %s has confirmed a verified device %s",
2147 peer->host, device);
2148 props->verified = TRUE;
2149 }
2150
2151 crm_element_value_int(xml, F_STONITH_DEVICE_SUPPORT_FLAGS, &flags);
2152 props->device_support_flags = flags;
2153
2154
2155 parse_action_specific(xml, peer->host, device, op_requested_action(op),
2156 op, st_phase_requested, props);
2157 for (child = pcmk__xml_first_child(xml); child != NULL;
2158 child = pcmk__xml_next(child)) {
2159
2160
2161
2162
2163 if (pcmk__str_eq(ID(child), "off", pcmk__str_none)) {
2164 parse_action_specific(child, peer->host, device, "off",
2165 op, st_phase_off, props);
2166 } else if (pcmk__str_eq(ID(child), "on", pcmk__str_none)) {
2167 parse_action_specific(child, peer->host, device, "on",
2168 op, st_phase_on, props);
2169 }
2170 }
2171 }
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184 static peer_device_info_t *
2185 add_result(remote_fencing_op_t *op, const char *host, int ndevices,
2186 const xmlNode *xml)
2187 {
2188 peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t));
2189 xmlNode *child;
2190
2191
2192
2193 CRM_CHECK(peer != NULL, return NULL);
2194 peer->host = strdup(host);
2195 peer->devices = pcmk__strkey_table(free, free);
2196
2197
2198 for (child = pcmk__xml_first_child(xml); child != NULL;
2199 child = pcmk__xml_next(child)) {
2200 const char *device = ID(child);
2201
2202 if (device) {
2203 add_device_properties(child, op, peer, device);
2204 }
2205 }
2206
2207 peer->ndevices = g_hash_table_size(peer->devices);
2208 CRM_CHECK(ndevices == peer->ndevices,
2209 crm_err("Query claimed to have %d device%s but %d found",
2210 ndevices, pcmk__plural_s(ndevices), peer->ndevices));
2211
2212 op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers);
2213 return peer;
2214 }
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230 int
2231 process_remote_stonith_query(xmlNode *msg)
2232 {
2233 int ndevices = 0;
2234 gboolean host_is_target = FALSE;
2235 gboolean have_all_replies = FALSE;
2236 const char *id = NULL;
2237 const char *host = NULL;
2238 remote_fencing_op_t *op = NULL;
2239 peer_device_info_t *peer = NULL;
2240 uint32_t replies_expected;
2241 xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
2242
2243 CRM_CHECK(dev != NULL, return -EPROTO);
2244
2245 id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
2246 CRM_CHECK(id != NULL, return -EPROTO);
2247
2248 dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR);
2249 CRM_CHECK(dev != NULL, return -EPROTO);
2250 crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices);
2251
2252 op = g_hash_table_lookup(stonith_remote_op_list, id);
2253 if (op == NULL) {
2254 crm_debug("Received query reply for unknown or expired operation %s",
2255 id);
2256 return -EOPNOTSUPP;
2257 }
2258
2259 replies_expected = fencing_active_peers();
2260 if (op->replies_expected < replies_expected) {
2261 replies_expected = op->replies_expected;
2262 }
2263 if ((++op->replies >= replies_expected) && (op->state == st_query)) {
2264 have_all_replies = TRUE;
2265 }
2266 host = crm_element_value(msg, F_ORIG);
2267 host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei);
2268
2269 crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s",
2270 op->replies, replies_expected, host,
2271 op->target, op->action, ndevices, pcmk__plural_s(ndevices), id);
2272 if (ndevices > 0) {
2273 peer = add_result(op, host, ndevices, dev);
2274 }
2275
2276 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2277
2278 if (pcmk_is_set(op->call_options, st_opt_topology)) {
2279
2280
2281
2282 if (op->state == st_query && all_topology_devices_found(op)) {
2283
2284 crm_trace("All topology devices found");
2285 request_peer_fencing(op, peer);
2286
2287 } else if (have_all_replies) {
2288 crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
2289 replies_expected, op->replies);
2290 request_peer_fencing(op, NULL);
2291 }
2292
2293 } else if (op->state == st_query) {
2294 int nverified = count_peer_devices(op, peer, TRUE,
2295 fenced_support_flag(op->action));
2296
2297
2298
2299 if ((peer != NULL) && !host_is_target && nverified) {
2300
2301 crm_trace("Found %d verified device%s",
2302 nverified, pcmk__plural_s(nverified));
2303 request_peer_fencing(op, peer);
2304
2305 } else if (have_all_replies) {
2306 crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
2307 replies_expected, op->replies);
2308 request_peer_fencing(op, NULL);
2309
2310 } else {
2311 crm_trace("Waiting for more peer results before launching fencing operation");
2312 }
2313
2314 } else if ((peer != NULL) && (op->state == st_done)) {
2315 crm_info("Discarding query result from %s (%d device%s): "
2316 "Operation is %s", peer->host,
2317 peer->ndevices, pcmk__plural_s(peer->ndevices),
2318 stonith_op_state_str(op->state));
2319 }
2320
2321 return pcmk_ok;
2322 }
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333 void
2334 fenced_process_fencing_reply(xmlNode *msg)
2335 {
2336 const char *id = NULL;
2337 const char *device = NULL;
2338 remote_fencing_op_t *op = NULL;
2339 xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
2340 pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
2341
2342 CRM_CHECK(dev != NULL, return);
2343
2344 id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
2345 CRM_CHECK(id != NULL, return);
2346
2347 dev = stonith__find_xe_with_result(msg);
2348 CRM_CHECK(dev != NULL, return);
2349
2350 stonith__xe_get_result(dev, &result);
2351
2352 device = crm_element_value(dev, F_STONITH_DEVICE);
2353
2354 if (stonith_remote_op_list) {
2355 op = g_hash_table_lookup(stonith_remote_op_list, id);
2356 }
2357
2358 if ((op == NULL) && pcmk__result_ok(&result)) {
2359
2360 const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
2361
2362 op = create_remote_stonith_op(client_id, dev, TRUE);
2363 }
2364
2365 if (op == NULL) {
2366
2367
2368 crm_info("Received peer result of unknown or expired operation %s", id);
2369 pcmk__reset_result(&result);
2370 return;
2371 }
2372
2373 pcmk__reset_result(&op->result);
2374 op->result = result;
2375
2376 if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
2377 crm_err("Received outdated reply for device %s (instead of %s) to "
2378 "fence (%s) %s. Operation already timed out at peer level.",
2379 device, (const char *) op->devices->data, op->action, op->target);
2380 return;
2381 }
2382
2383 if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
2384 if (pcmk__result_ok(&op->result)) {
2385 op->state = st_done;
2386 } else {
2387 op->state = st_failed;
2388 }
2389 finalize_op(op, msg, false);
2390 return;
2391
2392 } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
2393
2394
2395 crm_err("Received non-broadcast fencing result for operation %.8s "
2396 "we do not own (device %s targeting %s)",
2397 op->id, device, op->target);
2398 return;
2399 }
2400
2401 if (pcmk_is_set(op->call_options, st_opt_topology)) {
2402 const char *device = NULL;
2403 const char *reason = op->result.exit_reason;
2404
2405
2406
2407 if (op->state == st_done) {
2408 finalize_op(op, msg, false);
2409 return;
2410 }
2411
2412 device = crm_element_value(msg, F_STONITH_DEVICE);
2413
2414 if ((op->phase == 2) && !pcmk__result_ok(&op->result)) {
2415
2416
2417
2418 crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
2419 "after successful 'off'",
2420 device, pcmk_exec_status_str(op->result.execution_status),
2421 (reason == NULL)? "" : ": ",
2422 (reason == NULL)? "" : reason,
2423 op->target);
2424 pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2425 } else {
2426 crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: "
2427 "%s%s%s%s",
2428 op->action, op->target,
2429 ((device == NULL)? "" : " using "),
2430 ((device == NULL)? "" : device),
2431 op->client_name,
2432 op->originator,
2433 pcmk_exec_status_str(op->result.execution_status),
2434 (reason == NULL)? "" : " (",
2435 (reason == NULL)? "" : reason,
2436 (reason == NULL)? "" : ")");
2437 }
2438
2439 if (pcmk__result_ok(&op->result)) {
2440
2441
2442 advance_topology_device_in_level(op, device, msg);
2443 return;
2444 } else {
2445
2446
2447 if (advance_topology_level(op, false) != pcmk_rc_ok) {
2448 op->state = st_failed;
2449 finalize_op(op, msg, false);
2450 return;
2451 }
2452 }
2453
2454 } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) {
2455 op->state = st_done;
2456 finalize_op(op, msg, false);
2457 return;
2458
2459 } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT)
2460 && (op->devices == NULL)) {
2461
2462 op->state = st_failed;
2463 finalize_op(op, msg, false);
2464 return;
2465
2466 } else {
2467
2468 }
2469
2470
2471 crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
2472 op->target, op->originator, op->client_name,
2473 pcmk_exec_status_str(op->result.execution_status));
2474 request_peer_fencing(op, NULL);
2475 }
2476
2477 gboolean
2478 stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
2479 {
2480 GHashTableIter iter;
2481 time_t now = time(NULL);
2482 remote_fencing_op_t *rop = NULL;
2483
2484 if (tolerance <= 0 || !stonith_remote_op_list || target == NULL ||
2485 action == NULL) {
2486 return FALSE;
2487 }
2488
2489 g_hash_table_iter_init(&iter, stonith_remote_op_list);
2490 while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
2491 if (strcmp(rop->target, target) != 0) {
2492 continue;
2493 } else if (rop->state != st_done) {
2494 continue;
2495
2496
2497
2498 } else if (strcmp(rop->action, action) != 0) {
2499 continue;
2500 } else if ((rop->completed + tolerance) < now) {
2501 continue;
2502 }
2503
2504 crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
2505 target, action, tolerance, rop->delegate, rop->originator);
2506 return TRUE;
2507 }
2508 return FALSE;
2509 }