This source file includes following definitions.
- add_stonith_cleanup
- remove_stonith_cleanup
- purge_stonith_cleanup
- execute_stonith_cleanup
- fail_incompletable_stonith
- tengine_stonith_connection_destroy
- tengine_stonith_notify
- te_connect_stonith
- stop_te_timer
- te_graph_trigger
- trigger_graph_processing
- abort_transition_graph
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 #include <crm_internal.h>
20
21 #include <sys/param.h>
22 #include <crm/crm.h>
23
24 #include <crm/msg_xml.h>
25
26 #include <crm/common/xml.h>
27 #include <tengine.h>
28 #include <crmd_fsa.h>
29 #include <crmd_lrm.h>
30 #include <crmd_messages.h>
31 #include <throttle.h>
32 #include <crm/fencing/internal.h>
33
34 crm_trigger_t *stonith_reconnect = NULL;
35
36
37
38
39
40
41
42
43
44 static GListPtr stonith_cleanup_list = NULL;
45
46
47
48
49
50
51
52 void
53 add_stonith_cleanup(const char *target) {
54 stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
55 }
56
57
58
59
60
61
62
63 void
64 remove_stonith_cleanup(const char *target)
65 {
66 GListPtr iter = stonith_cleanup_list;
67
68 while (iter != NULL) {
69 GListPtr tmp = iter;
70 char *iter_name = tmp->data;
71
72 iter = iter->next;
73 if (safe_str_eq(target, iter_name)) {
74 crm_trace("Removing %s from the cleanup list", iter_name);
75 stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
76 free(iter_name);
77 }
78 }
79 }
80
81
82
83
84
85 void
86 purge_stonith_cleanup()
87 {
88 if (stonith_cleanup_list) {
89 GListPtr iter = NULL;
90
91 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
92 char *target = iter->data;
93
94 crm_info("Purging %s from stonith cleanup list", target);
95 free(target);
96 }
97 g_list_free(stonith_cleanup_list);
98 stonith_cleanup_list = NULL;
99 }
100 }
101
102
103
104
105
106 void
107 execute_stonith_cleanup()
108 {
109 GListPtr iter;
110
111 for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
112 char *target = iter->data;
113 crm_node_t *target_node = crm_get_peer(0, target);
114 const char *uuid = crm_peer_uuid(target_node);
115
116 crm_notice("Marking %s, target of a previous stonith action, as clean", target);
117 send_stonith_update(NULL, target, uuid);
118 free(target);
119 }
120 g_list_free(stonith_cleanup_list);
121 stonith_cleanup_list = NULL;
122 }
123
124
125
126 static gboolean
127 fail_incompletable_stonith(crm_graph_t * graph)
128 {
129 GListPtr lpc = NULL;
130 const char *task = NULL;
131 xmlNode *last_action = NULL;
132
133 if (graph == NULL) {
134 return FALSE;
135 }
136
137 for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
138 GListPtr lpc2 = NULL;
139 synapse_t *synapse = (synapse_t *) lpc->data;
140
141 if (synapse->confirmed) {
142 continue;
143 }
144
145 for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
146 crm_action_t *action = (crm_action_t *) lpc2->data;
147
148 if (action->type != action_type_crm || action->confirmed) {
149 continue;
150 }
151
152 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
153 if (task && safe_str_eq(task, CRM_OP_FENCE)) {
154 action->failed = TRUE;
155 last_action = action->xml;
156 update_graph(graph, action);
157 crm_notice("Failing action %d (%s): STONITHd terminated",
158 action->id, ID(action->xml));
159 }
160 }
161 }
162
163 if (last_action != NULL) {
164 crm_warn("STONITHd failure resulted in un-runnable actions");
165 abort_for_stonith_failure(tg_restart, NULL, last_action);
166 return TRUE;
167 }
168
169 return FALSE;
170 }
171
172 static void
173 tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e)
174 {
175 if (is_set(fsa_input_register, R_ST_REQUIRED)) {
176 crm_crit("Fencing daemon connection failed");
177 mainloop_set_trigger(stonith_reconnect);
178
179 } else {
180 crm_info("Fencing daemon disconnected");
181 }
182
183
184 if(stonith_api) {
185 stonith_api->state = stonith_disconnected;
186 }
187
188 if (AM_I_DC) {
189 fail_incompletable_stonith(transition_graph);
190 trigger_graph();
191 }
192 }
193
194 #if SUPPORT_CMAN
195 # include <libfenced.h>
196 #endif
197
198 char *te_client_id = NULL;
199
200 #ifdef HAVE_SYS_REBOOT_H
201 # include <unistd.h>
202 # include <sys/reboot.h>
203 #endif
204
205 static void
206 tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
207 {
208 if(te_client_id == NULL) {
209 te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
210 (unsigned long) getpid());
211 }
212
213 if (st_event == NULL) {
214 crm_err("Notify data not found");
215 return;
216 }
217
218 crmd_alert_fencing_op(st_event);
219
220 if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
221 crm_notice("%s was successfully unfenced by %s (at the request of %s)",
222 st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin);
223
224 return;
225
226 } else if (safe_str_eq("on", st_event->action)) {
227 crm_err("Unfencing of %s by %s failed: %s (%d)",
228 st_event->target, st_event->executioner ? st_event->executioner : "<anyone>",
229 pcmk_strerror(st_event->result), st_event->result);
230 return;
231
232 } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
233 crm_crit("We were allegedly just fenced by %s for %s!",
234 st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin);
235
236 qb_log_fini();
237
238
239
240
241
242
243
244 #ifdef RB_HALT_SYSTEM
245 reboot(RB_HALT_SYSTEM);
246 #endif
247
248
249
250
251
252
253
254
255
256
257
258
259 exit(100);
260 return;
261 }
262
263
264
265
266
267 if (!AM_I_DC && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
268 if (st_event->result == pcmk_ok) {
269 st_fail_count_reset(st_event->target);
270 } else {
271 st_fail_count_increment(st_event->target);
272 }
273 }
274
275 crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
276 CRM_XS " initiator=%s ref=%s",
277 st_event->target, st_event->result == pcmk_ok ? "" : " not",
278 st_event->action,
279 st_event->executioner ? st_event->executioner : "<anyone>",
280 (st_event->client_origin? st_event->client_origin : "<unknown>"),
281 pcmk_strerror(st_event->result),
282 st_event->origin, st_event->id);
283
284 #if SUPPORT_CMAN
285 if (st_event->result == pcmk_ok && is_cman_cluster()) {
286 int local_rc = 0;
287 int confirm = 0;
288 char *target_copy = strdup(st_event->target);
289
290
291
292
293
294 local_rc = fenced_external(target_copy);
295 if (local_rc != 0) {
296 crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target,
297 local_rc);
298 } else {
299 crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
300 }
301
302
303 confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY);
304 if (confirm >= 0) {
305 int ignore = 0;
306 int len = strlen(target_copy);
307
308 errno = 0;
309 local_rc = write(confirm, target_copy, len);
310 ignore = write(confirm, "\n", 1);
311
312 if(ignore < 0 && errno == EBADF) {
313 crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target);
314
315 } else if (local_rc < len) {
316 crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc);
317
318 } else {
319 fsync(confirm);
320 crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target);
321 }
322 close(confirm);
323 }
324 free(target_copy);
325 }
326 #endif
327
328 if (st_event->result == pcmk_ok) {
329 crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
330 const char *uuid = NULL;
331 gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
332
333 if (peer == NULL) {
334 return;
335 }
336
337 uuid = crm_peer_uuid(peer);
338
339 crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
340 if(AM_I_DC) {
341
342 send_stonith_update(NULL, st_event->target, uuid);
343
344
345
346
347
348
349
350
351
352 if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {
353
354
355
356
357 crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
358 abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
359 }
360
361
362 } else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target))
363 && !is_set(peer->flags, crm_remote_node)) {
364
365 crm_notice("Target %s our leader %s (recorded: %s)",
366 fsa_our_dc ? "was" : "may have been", st_event->target,
367 fsa_our_dc ? fsa_our_dc : "<unset>");
368
369
370
371
372
373 if (we_are_executioner) {
374 send_stonith_update(NULL, st_event->target, uuid);
375 }
376 add_stonith_cleanup(st_event->target);
377 }
378
379
380
381
382
383
384 if (is_set(peer->flags, crm_remote_node)) {
385 remote_ra_fail(st_event->target);
386 }
387
388 crmd_peer_down(peer, TRUE);
389 }
390 }
391
392 gboolean
393 te_connect_stonith(gpointer user_data)
394 {
395 int lpc = 0;
396 int rc = pcmk_ok;
397
398 if (stonith_api == NULL) {
399 stonith_api = stonith_api_new();
400 }
401
402 if (stonith_api->state != stonith_disconnected) {
403 crm_trace("Still connected");
404 return TRUE;
405 }
406
407 for (lpc = 0; lpc < 30; lpc++) {
408 crm_debug("Attempting connection to fencing daemon...");
409
410 sleep(1);
411 rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
412
413 if (rc == pcmk_ok) {
414 break;
415 }
416
417 if (user_data != NULL) {
418 if (is_set(fsa_input_register, R_ST_REQUIRED)) {
419 crm_err("Sign-in failed: triggered a retry");
420 mainloop_set_trigger(stonith_reconnect);
421 } else {
422 crm_info("Sign-in failed, but no longer required");
423 }
424 return TRUE;
425 }
426
427 crm_err("Sign-in failed: pausing and trying again in 2s...");
428 sleep(1);
429 }
430
431 CRM_CHECK(rc == pcmk_ok, return TRUE);
432 stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT,
433 tengine_stonith_connection_destroy);
434
435 stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
436 tengine_stonith_notify);
437
438 crm_trace("Connected");
439 return TRUE;
440 }
441
442 gboolean
443 stop_te_timer(crm_action_timer_t * timer)
444 {
445 const char *timer_desc = "action timer";
446
447 if (timer == NULL) {
448 return FALSE;
449 }
450 if (timer->reason == timeout_abort) {
451 timer_desc = "global timer";
452 crm_trace("Stopping %s", timer_desc);
453 }
454
455 if (timer->source_id != 0) {
456 crm_trace("Stopping %s", timer_desc);
457 g_source_remove(timer->source_id);
458 timer->source_id = 0;
459
460 } else {
461 crm_trace("%s was already stopped", timer_desc);
462 return FALSE;
463 }
464
465 return TRUE;
466 }
467
468 gboolean
469 te_graph_trigger(gpointer user_data)
470 {
471 enum transition_status graph_rc = -1;
472
473 if (transition_graph == NULL) {
474 crm_debug("Nothing to do");
475 return TRUE;
476 }
477
478 crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state));
479
480 switch (fsa_state) {
481 case S_STARTING:
482 case S_PENDING:
483 case S_NOT_DC:
484 case S_HALT:
485 case S_ILLEGAL:
486 case S_STOPPING:
487 case S_TERMINATE:
488 return TRUE;
489 break;
490 default:
491 break;
492 }
493
494 if (transition_graph->complete == FALSE) {
495 int limit = transition_graph->batch_limit;
496
497 transition_graph->batch_limit = throttle_get_total_job_limit(limit);
498 graph_rc = run_graph(transition_graph);
499 transition_graph->batch_limit = limit;
500
501
502
503
504 if (graph_rc == transition_active) {
505 crm_trace("Transition not yet complete");
506 return TRUE;
507
508 } else if (graph_rc == transition_pending) {
509 crm_trace("Transition not yet complete - no actions fired");
510 return TRUE;
511 }
512
513 if (graph_rc != transition_complete) {
514 crm_warn("Transition failed: %s", transition_status(graph_rc));
515 print_graph(LOG_NOTICE, transition_graph);
516 }
517 }
518
519 crm_debug("Transition %d is now complete", transition_graph->id);
520 transition_graph->complete = TRUE;
521 notify_crmd(transition_graph);
522
523 return TRUE;
524 }
525
526 void
527 trigger_graph_processing(const char *fn, int line)
528 {
529 crm_trace("%s:%d - Triggered graph processing", fn, line);
530 mainloop_set_trigger(transition_trigger);
531 }
532
533 void
534 abort_transition_graph(int abort_priority, enum transition_action abort_action,
535 const char *abort_text, xmlNode * reason, const char *fn, int line)
536 {
537 int add[] = { 0, 0, 0 };
538 int del[] = { 0, 0, 0 };
539 int level = LOG_INFO;
540 xmlNode *diff = NULL;
541 xmlNode *change = NULL;
542
543 CRM_CHECK(transition_graph != NULL, return);
544
545 switch (fsa_state) {
546 case S_STARTING:
547 case S_PENDING:
548 case S_NOT_DC:
549 case S_HALT:
550 case S_ILLEGAL:
551 case S_STOPPING:
552 case S_TERMINATE:
553 crm_info("Abort %s suppressed: state=%s (complete=%d)",
554 abort_text, fsa_state2string(fsa_state), transition_graph->complete);
555 return;
556 default:
557 break;
558 }
559
560
561 free(fsa_pe_ref);
562 fsa_pe_ref = NULL;
563
564 if (transition_graph->complete == FALSE) {
565 if(update_abort_priority(transition_graph, abort_priority, abort_action, abort_text)) {
566 level = LOG_NOTICE;
567 }
568 }
569
570 if(reason) {
571 xmlNode *search = NULL;
572
573 for(search = reason; search; search = search->parent) {
574 if (safe_str_eq(XML_TAG_DIFF, TYPE(search))) {
575 diff = search;
576 break;
577 }
578 }
579
580 if(diff) {
581 xml_patch_versions(diff, add, del);
582 for(search = reason; search; search = search->parent) {
583 if (safe_str_eq(XML_DIFF_CHANGE, TYPE(search))) {
584 change = search;
585 break;
586 }
587 }
588 }
589 }
590
591 if(reason == NULL) {
592 do_crm_log(level, "Transition aborted: %s "CRM_XS" source=%s:%d complete=%s",
593 abort_text, fn, line,
594 (transition_graph->complete? "true" : "false"));
595
596 } else if(change == NULL) {
597 char *local_path = xml_get_path(reason);
598
599 do_crm_log(level, "Transition aborted by %s.%s: %s "
600 CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
601 TYPE(reason), ID(reason), abort_text,
602 add[0], add[1], add[2], fn, line, local_path,
603 (transition_graph->complete? "true" : "false"));
604 free(local_path);
605
606 } else {
607 const char *kind = NULL;
608 const char *op = crm_element_value(change, XML_DIFF_OP);
609 const char *path = crm_element_value(change, XML_DIFF_PATH);
610
611 if(change == reason) {
612 if(strcmp(op, "create") == 0) {
613 reason = reason->children;
614
615 } else if(strcmp(op, "modify") == 0) {
616 reason = first_named_child(reason, XML_DIFF_RESULT);
617 if(reason) {
618 reason = reason->children;
619 }
620 }
621 }
622
623 kind = TYPE(reason);
624 if(strcmp(op, "delete") == 0) {
625 const char *shortpath = strrchr(path, '/');
626
627 do_crm_log(level, "Transition aborted by deletion of %s: %s "
628 CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
629 (shortpath? (shortpath + 1) : path), abort_text,
630 add[0], add[1], add[2], fn, line, path,
631 (transition_graph->complete? "true" : "false"));
632
633 } else if (safe_str_eq(XML_CIB_TAG_NVPAIR, kind)) {
634 do_crm_log(level, "Transition aborted by %s doing %s %s=%s: %s "
635 CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
636 crm_element_value(reason, XML_ATTR_ID), op,
637 crm_element_value(reason, XML_NVPAIR_ATTR_NAME),
638 crm_element_value(reason, XML_NVPAIR_ATTR_VALUE),
639 abort_text, add[0], add[1], add[2], fn, line, path,
640 (transition_graph->complete? "true" : "false"));
641
642 } else if (safe_str_eq(XML_LRM_TAG_RSC_OP, kind)) {
643 const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
644
645 do_crm_log(level, "Transition aborted by operation %s '%s' on %s: %s "
646 CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
647 crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op,
648 crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text,
649 magic, add[0], add[1], add[2], fn, line,
650 (transition_graph->complete? "true" : "false"));
651
652 } else if (safe_str_eq(XML_CIB_TAG_STATE, kind)
653 || safe_str_eq(XML_CIB_TAG_NODE, kind)) {
654 const char *uname = crm_peer_uname(ID(reason));
655
656 do_crm_log(level, "Transition aborted by %s '%s' on %s: %s "
657 CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s",
658 kind, op, (uname? uname : ID(reason)), abort_text,
659 add[0], add[1], add[2], fn, line,
660 (transition_graph->complete? "true" : "false"));
661
662 } else {
663 do_crm_log(level, "Transition aborted by %s.%s '%s': %s "
664 CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
665 TYPE(reason), ID(reason), (op? op : "change"), abort_text,
666 add[0], add[1], add[2], fn, line, path,
667 (transition_graph->complete? "true" : "false"));
668 }
669 }
670
671 if (transition_graph->complete) {
672 if (transition_timer->period_ms > 0) {
673 crm_timer_stop(transition_timer);
674 crm_timer_start(transition_timer);
675 } else {
676 register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
677 }
678 return;
679 }
680
681 mainloop_set_trigger(transition_trigger);
682 }