30 struct action_history {
39 const char *exit_reason;
42 int expected_exit_status;
51 #define set_config_flag(scheduler, option, flag) do { \ 52 GHashTable *config_hash = (scheduler)->config_hash; \ 53 const char *scf_value = pcmk__cluster_option(config_hash, (option)); \ 55 if (scf_value != NULL) { \ 56 if (crm_is_true(scf_value)) { \ 57 (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \ 58 LOG_TRACE, "Scheduler", \ 59 crm_system_name, (scheduler)->flags, \ 62 (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ 63 LOG_TRACE, "Scheduler", \ 64 crm_system_name, (scheduler)->flags, \ 71 xmlNode *xml_op, xmlNode **last_failure,
75 static void add_node_attrs(
const xmlNode *xml_obj,
pcmk_node_t *node,
77 static void determine_online_status(
const xmlNode *node_state,
81 static void unpack_node_lrm(
pcmk_node_t *node,
const xmlNode *xml,
91 if (pcmk__is_pacemaker_remote_node(node)
113 const char *reason,
bool priority_delay)
118 if (pcmk__is_guest_or_bundle_node(node)) {
124 "(otherwise would because %s): " 125 "its guest resource %s is unmanaged",
126 pcmk__node_name(node), reason, rsc->
id);
129 "(by recovering its guest resource %s): %s",
130 pcmk__node_name(node), rsc->
id, reason);
142 }
else if (is_dangling_guest_node(node)) {
143 crm_info(
"Cleaning up dangling connection for guest node %s: " 144 "fencing was already done because %s, " 145 "and guest resource no longer exists",
146 pcmk__node_name(node), reason);
150 }
else if (pcmk__is_remote_node(node)) {
155 "(otherwise would because %s): connection is unmanaged",
156 pcmk__node_name(node), reason);
160 pcmk__node_name(node),
169 crm_trace(
"Cluster node %s %s because %s",
170 pcmk__node_name(node),
176 pcmk__node_name(node),
187 #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \ 188 "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'" \ 189 "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \ 190 "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']" 193 #define XPATH_ENABLE_UNFENCING \ 194 "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \ 195 "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \ 196 "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \ 197 "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR 202 xmlXPathObjectPtr
result = NULL;
216 const char *value = NULL;
217 guint interval_ms = 0U;
239 crm_info(
"Startup probes: disabled (dangerous)");
244 crm_info(
"Watchdog-based self-fencing will be performed via SBD if " 259 if (interval_ms >= INT_MAX) {
269 crm_debug(
"STONITH of failed nodes is enabled");
271 crm_debug(
"STONITH of failed nodes is disabled");
280 "removed in a future release " 289 crm_debug(
"Concurrent fencing is enabled");
291 crm_debug(
"Concurrent fencing is disabled");
298 crm_trace(
"Priority fencing delay is %ds",
304 crm_debug(
"Stop all active resources: %s",
310 crm_debug(
"Cluster is symmetric" " - resources can run anywhere by default");
335 " to 'stop': cluster has never had quorum");
340 " to 'stop' because fencing is disabled");
350 crm_debug(
"On loss of quorum: Freeze resources");
353 crm_debug(
"On loss of quorum: Stop ALL resources");
357 "Demote promotable resources and stop other resources");
360 crm_notice(
"On loss of quorum: Fence all remaining nodes");
370 crm_trace(
"Orphan resources are stopped");
372 crm_trace(
"Orphan resources are ignored");
378 crm_trace(
"Orphan resource actions are stopped");
380 crm_trace(
"Orphan resource actions are ignored");
389 " cluster property is deprecated and will be " 390 "removed in a future release");
405 crm_trace(
"Start failures are always fatal");
407 crm_trace(
"Start failures are handled by failcount");
415 crm_trace(
"Unseen nodes will be fenced");
418 "Blind faith: not fencing unseen nodes");
433 crm_trace(
"Resources will be locked to nodes that were cleanly " 434 "shut down (locks expire after %s)",
437 crm_trace(
"Resources will not be locked to nodes that were cleanly " 447 crm_trace(
"Fence pending nodes after %s",
480 if (new_node == NULL) {
488 if (new_node->
details == NULL) {
522 "(such as %s) is deprecated and will be removed in a " 524 pcmk__s(
uname,
"unnamed node"));
530 if (pcmk__is_pacemaker_remote_node(new_node)) {
548 xmlNode *attr_set = NULL;
549 xmlNode *attr = NULL;
551 const char *container_id = pcmk__xe_id(xml_obj);
552 const char *remote_name = NULL;
553 const char *remote_server = NULL;
554 const char *remote_port = NULL;
555 const char *connect_timeout =
"60s";
556 const char *remote_allow_migrate=NULL;
557 const char *is_managed = NULL;
560 attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) {
567 attr != NULL; attr = pcmk__xe_next(attr)) {
580 remote_server = value;
586 connect_timeout = value;
589 remote_allow_migrate = value;
597 if (remote_name == NULL) {
606 remote_allow_migrate, is_managed,
607 connect_timeout, remote_server, remote_port);
640 xmlNode *xml_obj = NULL;
642 const char *
id = NULL;
643 const char *
uname = NULL;
644 const char *
type = NULL;
647 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
662 "> entry in configuration without id");
668 "because '%s' is not a valid score: %s",
669 pcmk__s(
uname,
"without name"),
675 if (new_node == NULL) {
679 handle_startup_fencing(
scheduler, new_node);
681 add_node_attrs(xml_obj, new_node, FALSE,
scheduler);
690 crm_info(
"Creating a fake local node");
701 const char *container_id = NULL;
718 rsc->
id, container_id);
721 rsc->
id, container_id);
729 xmlNode *xml_obj = NULL;
735 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
737 const char *new_node_id = NULL;
743 new_node_id = pcmk__xe_id(xml_obj);
749 crm_trace(
"Found remote node %s defined by resource %s",
750 new_node_id, pcmk__xe_id(xml_obj));
765 new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
769 crm_trace(
"Found guest node %s in resource %s",
770 new_node_id, pcmk__xe_id(xml_obj));
781 xmlNode *xml_obj2 = NULL;
783 xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) {
785 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
790 crm_trace(
"Found guest node %s in resource %s inside group %s",
791 new_node_id, pcmk__xe_id(xml_obj2),
792 pcmk__xe_id(xml_obj));
827 pcmk__rsc_trace(new_rsc,
"Linking remote connection resource %s to %s",
828 new_rsc->
id, pcmk__node_name(remote_node));
835 handle_startup_fencing(
scheduler, remote_node);
847 destroy_tag(gpointer
data)
853 g_list_free_full(tag->
refs, free);
873 xmlNode *xml_obj = NULL;
879 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
882 const char *
id = pcmk__xe_id(xml_obj);
884 if (pcmk__str_empty(
id)) {
892 NULL, NULL) == FALSE) {
907 "because configuration is invalid",
927 pcmk__config_err(
"Resource start-up disabled since no STONITH resources have been defined");
930 pcmk__config_err(
"NOTE: Clusters with shared data need STONITH to ensure data integrity");
948 xmlNode *xml_obj = NULL;
957 if (pcmk__str_empty(pcmk__xe_id(xml_obj))) {
975 xmlNode *xml_tag = NULL;
980 xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) {
982 xmlNode *xml_obj_ref = NULL;
983 const char *tag_id = pcmk__xe_id(xml_tag);
989 if (tag_id == NULL) {
991 (
const char *) xml_tag->name);
996 xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
998 const char *obj_ref = pcmk__xe_id(xml_obj_ref);
1004 if (obj_ref == NULL) {
1006 xml_obj_ref->name, tag_id);
1024 const char *ticket_id = NULL;
1025 const char *granted = NULL;
1026 const char *last_granted = NULL;
1027 const char *standby = NULL;
1028 xmlAttrPtr xIter = NULL;
1032 ticket_id = pcmk__xe_id(xml_ticket);
1033 if (pcmk__str_empty(ticket_id)) {
1037 crm_trace(
"Processing ticket state for %s", ticket_id);
1040 if (ticket == NULL) {
1042 if (ticket == NULL) {
1047 for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1048 const char *prop_name = (
const char *)xIter->name;
1049 const char *prop_value = pcmk__xml_attr_value(xIter);
1063 crm_info(
"We do not have ticket '%s'", ticket->
id);
1068 long long last_granted_ll = 0LL;
1069 int rc =
pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1073 " value '%s' in state for ticket %s: %s",
1074 last_granted_ll, last_granted, ticket->
id,
1084 crm_info(
"Granted ticket '%s' is in standby-mode", ticket->
id);
1090 crm_trace(
"Done with ticket state for %s", ticket_id);
1098 xmlNode *xml_obj = NULL;
1101 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
1106 unpack_ticket_state(xml_obj,
scheduler);
1113 unpack_handle_remote_attrs(
pcmk_node_t *this_node,
const xmlNode *state,
1116 const char *discovery = NULL;
1117 const xmlNode *attrs = NULL;
1124 if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1127 crm_trace(
"Processing Pacemaker Remote node %s",
1128 pcmk__node_name(this_node));
1140 add_node_attrs(attrs, this_node, TRUE,
scheduler);
1143 crm_info(
"%s is shutting down", pcmk__node_name(this_node));
1149 crm_info(
"%s is in standby mode", pcmk__node_name(this_node));
1156 crm_info(
"%s is in maintenance mode", pcmk__node_name(this_node));
1163 if ((discovery != NULL) && !
crm_is_true(discovery)) {
1167 " node attribute is deprecated and will be removed" 1168 " (and behave as 'true') in a future release.");
1170 if (pcmk__is_remote_node(this_node)
1174 " attribute on Pacemaker Remote node %s" 1175 " because fencing is disabled",
1176 pcmk__node_name(this_node));
1183 crm_info(
"%s has resource discovery disabled",
1184 pcmk__node_name(this_node));
1199 unpack_transient_attributes(
const xmlNode *state,
pcmk_node_t *node,
1202 const char *discovery = NULL;
1207 add_node_attrs(attrs, node, TRUE,
scheduler);
1211 crm_info(
"%s is in standby mode", pcmk__node_name(node));
1217 crm_info(
"%s is in maintenance mode", pcmk__node_name(node));
1224 if ((discovery != NULL) && !
crm_is_true(discovery)) {
1227 " attribute for %s because disabling resource" 1228 " discovery is not allowed for cluster nodes",
1229 pcmk__node_name(node));
1248 const char *
id = NULL;
1249 const char *
uname = NULL;
1261 if (
uname == NULL) {
1274 if (this_node == NULL) {
1275 crm_notice(
"Ignoring recorded state for removed node with name %s and " 1280 if (pcmk__is_pacemaker_remote_node(this_node)) {
1291 unpack_transient_attributes(state, this_node,
scheduler);
1299 crm_trace(
"Determining online status of cluster node %s (id %s)",
1300 pcmk__node_name(this_node),
id);
1301 determine_online_status(state, this_node,
scheduler);
1333 unpack_node_history(
const xmlNode *status,
bool fence,
1344 const char *
id = pcmk__xe_id(state);
1348 if ((
id == NULL) || (
uname == NULL)) {
1350 crm_trace(
"Not unpacking resource history from malformed " 1356 if (this_node == NULL) {
1358 crm_trace(
"Not unpacking resource history for node %s because " 1359 "no longer in configuration",
id);
1364 crm_trace(
"Not unpacking resource history for node %s because " 1365 "already unpacked",
id);
1372 }
else if (pcmk__is_guest_or_bundle_node(this_node)) {
1381 crm_trace(
"Not unpacking resource history for guest node %s " 1382 "because container and connection are not known to " 1387 }
else if (pcmk__is_remote_node(this_node)) {
1398 crm_trace(
"Not unpacking resource history for remote node %s " 1399 "because connection is not known to be up",
id);
1412 crm_trace(
"Not unpacking resource history for offline " 1413 "cluster node %s",
id);
1417 if (pcmk__is_pacemaker_remote_node(this_node)) {
1418 determine_remote_online_status(
scheduler, this_node);
1419 unpack_handle_remote_attrs(this_node, state,
scheduler);
1422 crm_trace(
"Unpacking resource history for %snode %s",
1423 (fence?
"unseen " :
""),
id);
1426 unpack_node_lrm(this_node, state,
scheduler);
1439 xmlNode *state = NULL;
1448 state = pcmk__xe_next(state)) {
1451 unpack_tickets_state((xmlNode *) state,
scheduler);
1458 while (unpack_node_history(status, FALSE,
scheduler) == EAGAIN) {
1459 crm_trace(
"Another pass through node resource histories is needed");
1463 unpack_node_history(status,
1474 pcmk_node_t *node = pcmk__current_node(container);
1488 for (GList *gIter =
scheduler->
nodes; gIter != NULL; gIter = gIter->next) {
1491 if (!pcmk__is_pacemaker_remote_node(this_node)) {
1500 determine_remote_online_status(
scheduler, this_node);
1524 if (member_time == NULL) {
1540 long long when_member = 0LL;
1543 0LL) !=
pcmk_rc_ok) || (when_member < 0LL)) {
1562 unpack_node_online(
const xmlNode *node_state)
1575 long long when_online = 0LL;
1578 || (when_online < 0)) {
1597 unpack_node_terminate(
const pcmk_node_t *node,
const xmlNode *node_state)
1599 long long value = 0LL;
1607 return (value_i != 0);
1614 "node attribute for %s: %s",
1621 const xmlNode *node_state,
1624 gboolean online = FALSE;
1627 long long when_member = unpack_node_member(node_state,
scheduler);
1628 long long when_online = unpack_node_online(node_state);
1630 if (when_member <= 0) {
1631 crm_trace(
"Node %s is %sdown", pcmk__node_name(this_node),
1632 ((when_member < 0)?
"presumed " :
""));
1634 }
else if (when_online > 0) {
1638 crm_debug(
"Node %s is not ready to run resources: %s",
1639 pcmk__node_name(this_node), join);
1643 crm_trace(
"Node %s controller is down: " 1644 "member@%lld online@%lld join=%s expected=%s",
1645 pcmk__node_name(this_node), when_member, when_online,
1646 pcmk__s(join,
"<null>"), pcmk__s(exp_state,
"<null>"));
1651 crm_info(
"Node %s member@%lld online@%lld join=%s expected=%s",
1652 pcmk__node_name(this_node), when_member, when_online,
1653 pcmk__s(join,
"<null>"), pcmk__s(exp_state,
"<null>"));
1673 long long when_member,
long long when_online)
1676 && (when_member > 0) && (when_online <= 0)) {
1693 const xmlNode *node_state,
1696 bool termination_requested = unpack_node_terminate(this_node, node_state);
1699 long long when_member = unpack_node_member(node_state,
scheduler);
1700 long long when_online = unpack_node_online(node_state);
1720 crm_trace(
"Node %s member@%lld online@%lld join=%s expected=%s%s",
1721 pcmk__node_name(this_node), when_member, when_online,
1722 pcmk__s(join,
"<null>"), pcmk__s(exp_state,
"<null>"),
1723 (termination_requested?
" (termination requested)" :
""));
1726 crm_debug(
"%s is shutting down", pcmk__node_name(this_node));
1729 return (when_online > 0);
1732 if (when_member < 0) {
1734 "peer has not been seen by the cluster", FALSE);
1740 "peer failed Pacemaker membership criteria", FALSE);
1742 }
else if (termination_requested) {
1743 if ((when_member <= 0) && (when_online <= 0)
1745 crm_info(
"%s was fenced as requested", pcmk__node_name(this_node));
1753 if (pending_too_long(
scheduler, this_node, when_member, when_online)) {
1755 "peer pending timed out on joining the process group",
1758 }
else if ((when_member > 0) || (when_online > 0)) {
1759 crm_info(
"- %s is not ready to run resources",
1760 pcmk__node_name(this_node));
1765 crm_trace(
"%s is down or still coming up",
1766 pcmk__node_name(this_node));
1769 }
else if (when_member <= 0) {
1772 "peer is no longer part of the cluster", TRUE);
1774 }
else if (when_online <= 0) {
1776 "peer process is no longer available", FALSE);
1781 crm_info(
"%s is active", pcmk__node_name(this_node));
1785 crm_info(
"%s is not ready to run resources",
1786 pcmk__node_name(this_node));
1795 return (when_member > 0);
1812 goto remote_online_done;
1817 if (container && pcmk__list_of_1(rsc->
running_on)) {
1823 crm_trace(
"%s node %s presumed ONLINE because connection resource is started",
1824 (container?
"Guest" :
"Remote"), this_node->
details->
id);
1832 crm_trace(
"%s node %s shutting down because connection resource is stopping",
1833 (container?
"Guest" :
"Remote"), this_node->
details->
id);
1839 crm_trace(
"Guest node %s UNCLEAN because guest resource failed",
1845 crm_trace(
"%s node %s OFFLINE because connection resource failed",
1846 (container?
"Guest" :
"Remote"), this_node->
details->
id);
1850 || ((container != NULL)
1853 crm_trace(
"%s node %s OFFLINE because its resource is stopped",
1854 (container?
"Guest" :
"Remote"), this_node->
details->
id);
1858 }
else if (
host && (
host->details->online == FALSE)
1859 &&
host->details->unclean) {
1860 crm_trace(
"Guest node %s UNCLEAN because host is unclean",
1872 determine_online_status(
const xmlNode *node_state,
pcmk_node_t *this_node,
1875 gboolean online = FALSE;
1898 online = determine_online_status_no_fencing(
scheduler, node_state,
1902 online = determine_online_status_fencing(
scheduler, node_state,
1911 this_node->
fixed = TRUE;
1917 this_node->
fixed = TRUE;
1922 crm_info(
"%s is not a Pacemaker node", pcmk__node_name(this_node));
1928 crm_info(
"%s is %s", pcmk__node_name(this_node),
1935 crm_trace(
"%s is offline", pcmk__node_name(this_node));
1950 if (!pcmk__str_empty(
id)) {
1951 const char *end =
id + strlen(
id) - 1;
1953 for (
const char *s = end; s >
id; --s) {
1967 return (s == end)? s : (s - 1);
1991 char *basename = NULL;
1994 basename = strndup(last_rsc_id, end - last_rsc_id + 1);
2013 size_t base_name_len = end - last_rsc_id + 1;
2018 memcpy(zero, last_rsc_id, base_name_len);
2019 zero[base_name_len] =
':';
2020 zero[base_name_len + 1] =
'0';
2025 create_fake_resource(
const char *rsc_id,
const xmlNode *rsc_entry,
2042 crm_debug(
"Detected orphaned remote node %s", rsc_id);
2051 crm_trace(
"Setting node %s as shutting down due to orphaned connection resource", rsc_id);
2058 crm_trace(
"Detected orphaned container filler %s", rsc_id);
2088 top->
id,
parent->id, rsc_id, pcmk__node_name(node));
2111 GList *rIter = NULL;
2114 gboolean skip_inactive = FALSE;
2120 rsc_id, pcmk__node_name(node),
parent->id);
2121 for (rIter =
parent->children; rsc == NULL && rIter; rIter = rIter->next) {
2122 GList *locations = NULL;
2148 if (pcmk__same_node((
pcmk_node_t *) locations->data, node)) {
2156 rsc =
parent->fns->find_rsc(child, rsc_id, NULL,
2167 crm_notice(
"Active (now-)anonymous clone %s has " 2168 "multiple (orphan) instance histories on %s",
2169 parent->id, pcmk__node_name(node));
2170 skip_inactive = TRUE;
2177 g_list_free(locations);
2181 if (!skip_inactive && !inactive_instance
2184 inactive_instance =
parent->fns->find_rsc(child, rsc_id, NULL,
2190 if ((inactive_instance != NULL) &&
2192 !pcmk__same_node(inactive_instance->
pending_node, node)) {
2193 inactive_instance = NULL;
2199 if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2201 inactive_instance->
id);
2202 rsc = inactive_instance;
2220 && !pcmk__is_guest_or_bundle_node(node)
2257 crm_trace(
"%s is not known as %s either (orphan)",
2263 crm_trace(
"Resource history for %s is orphaned because it is no longer primitive",
2271 if (pcmk__is_anonymous_clone(
parent)) {
2273 if (pcmk__is_bundled(
parent)) {
2289 rsc_id, pcmk__node_name(node), rsc->
id,
2296 process_orphan_resource(
const xmlNode *rsc_entry,
const pcmk_node_t *node,
2302 crm_debug(
"Detected orphan resource %s on %s",
2303 rsc_id, pcmk__node_name(node));
2304 rsc = create_fake_resource(rsc_id, rsc_entry,
scheduler);
2326 char *reason = NULL;
2344 ((rsc->
clone_name == NULL)?
"" :
" also known as "),
2346 pcmk__node_name(n));
2362 gboolean should_fence = FALSE;
2371 if (pcmk__is_guest_or_bundle_node(node)) {
2373 should_fence = TRUE;
2377 if (pcmk__is_remote_node(node)
2390 " revoked if remote connection can " 2391 "be re-established elsewhere)",
2394 should_fence = TRUE;
2398 if (reason == NULL) {
2407 save_on_fail = on_fail;
2453 "__action_migration_auto__", rsc->
cluster);
2472 if ((rsc->
container != NULL) && pcmk__is_bundled(rsc)) {
2495 if (pcmk__is_remote_node(tmpnode)
2501 "remote connection is unrecoverable", FALSE);
2533 crm_notice(
"Removed resource %s is active on %s and will be " 2534 "stopped when possible",
2535 rsc->
id, pcmk__node_name(node));
2537 crm_notice(
"Removed resource %s must be stopped manually on %s " 2539 " is set to false", rsc->
id, pcmk__node_name(node));
2570 GList *gIter = possible_matches;
2572 for (; gIter != NULL; gIter = gIter->next) {
2578 g_list_free(possible_matches);
2597 int start_index,
int stop_index,
2601 const char *task = NULL;
2602 const char *status = NULL;
2603 GList *gIter = sorted_op_list;
2607 rsc->
id, start_index, stop_index);
2609 for (; gIter != NULL; gIter = gIter->next) {
2610 xmlNode *rsc_op = (xmlNode *) gIter->data;
2612 guint interval_ms = 0;
2614 const char *
id = pcmk__xe_id(rsc_op);
2620 rsc->
id, pcmk__node_name(node));
2624 }
else if (start_index < stop_index && counter <= stop_index) {
2626 id, pcmk__node_name(node));
2629 }
else if (counter < start_index) {
2631 id, pcmk__node_name(node), counter);
2636 if (interval_ms == 0) {
2638 id, pcmk__node_name(node));
2645 id, pcmk__node_name(node));
2651 pcmk__rsc_trace(rsc,
"Creating %s on %s", key, pcmk__node_name(node));
2661 int implied_monitor_start = -1;
2662 int implied_clone_start = -1;
2663 const char *task = NULL;
2664 const char *status = NULL;
2669 for (
const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2670 const xmlNode *rsc_op = (
const xmlNode *) iter->data;
2679 *stop_index = counter;
2683 *start_index = counter;
2685 }
else if ((implied_monitor_start <= *stop_index)
2691 implied_monitor_start = counter;
2695 implied_clone_start = counter;
2699 if (*start_index == -1) {
2700 if (implied_clone_start != -1) {
2701 *start_index = implied_clone_start;
2702 }
else if (implied_monitor_start != -1) {
2703 *start_index = implied_monitor_start;
2713 time_t lock_time = 0;
2716 &lock_time) ==
pcmk_ok) && (lock_time != 0)) {
2722 rsc->
id, pcmk__node_name(node));
2746 unpack_lrm_resource(
pcmk_node_t *node,
const xmlNode *lrm_resource,
2749 GList *gIter = NULL;
2750 int stop_index = -1;
2751 int start_index = -1;
2754 const char *rsc_id = pcmk__xe_id(lrm_resource);
2757 GList *op_list = NULL;
2758 GList *sorted_op_list = NULL;
2760 xmlNode *rsc_op = NULL;
2761 xmlNode *last_failure = NULL;
2766 if (rsc_id == NULL) {
2773 rsc_id, pcmk__node_name(node));
2782 op_list = g_list_prepend(op_list, rsc_op);
2786 if (op_list == NULL) {
2793 rsc = unpack_find_resource(
scheduler, node, rsc_id);
2795 if (op_list == NULL) {
2799 rsc = process_orphan_resource(lrm_resource, node,
scheduler);
2806 unpack_shutdown_lock(lrm_resource, rsc, node,
scheduler);
2810 saved_role = rsc->
role;
2814 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2815 xmlNode *rsc_op = (xmlNode *) gIter->data;
2817 unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2822 process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2826 g_list_free(sorted_op_list);
2828 process_rsc_state(rsc, node, on_fail);
2832 || (req_role < rsc->next_role)) {
2838 "%s: Not overwriting calculated next role %s" 2839 " with requested next role %s",
2845 if (saved_role > rsc->
role) {
2846 rsc->
role = saved_role;
2853 handle_orphaned_container_fillers(
const xmlNode *lrm_rsc_list,
2858 rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
2863 const char *container_id;
2871 if (container_id == NULL || rsc_id == NULL) {
2876 if (container == NULL) {
2881 if ((rsc == NULL) || (rsc->
container != NULL)
2886 pcmk__rsc_trace(rsc,
"Mapped container of orphaned resource %s to %s",
2887 rsc->
id, container_id);
2902 unpack_node_lrm(
pcmk_node_t *node,
const xmlNode *xml,
2905 bool found_orphaned_container_filler =
false;
2927 found_orphaned_container_filler =
true;
2934 if (found_orphaned_container_filler) {
2935 handle_orphaned_container_fillers(xml,
scheduler);
2952 set_node_score(gpointer key, gpointer value, gpointer user_data)
2955 int *score = user_data;
2960 #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ 2961 "/" PCMK__XE_NODE_STATE 2962 #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \ 2963 "/" PCMK__XE_LRM_RESOURCES \ 2964 "/" PCMK__XE_LRM_RESOURCE 2965 #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP 2968 find_lrm_op(
const char *resource,
const char *op,
const char *node,
const char *source,
2971 GString *xpath = NULL;
2972 xmlNode *xml = NULL;
2974 CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2977 xpath = g_string_sized_new(256);
2990 }
else if ((source != NULL)
2996 g_string_append_c(xpath,
']');
3001 g_string_free(xpath, TRUE);
3003 if (xml && target_rc >= 0) {
3017 find_lrm_resource(
const char *rsc_id,
const char *node_name,
3020 GString *xpath = NULL;
3021 xmlNode *xml = NULL;
3023 CRM_CHECK((rsc_id != NULL) && (node_name != NULL),
return NULL);
3025 xpath = g_string_sized_new(256);
3034 g_string_free(xpath, TRUE);
3051 xmlXPathObjectPtr search;
3061 result = (numXpathResults(search) == 0);
3080 monitor_not_running_after(
const char *rsc_id,
const char *node_name,
3081 const xmlNode *xml_op,
bool same_node,
3106 non_monitor_after(
const char *rsc_id,
const char *node_name,
3107 const xmlNode *xml_op,
bool same_node,
3110 xmlNode *lrm_resource = NULL;
3112 lrm_resource = find_lrm_resource(rsc_id, node_name,
scheduler);
3113 if (lrm_resource == NULL) {
3121 const char * task = NULL;
3153 newer_state_after_migrate(
const char *rsc_id,
const char *node_name,
3154 const xmlNode *migrate_to,
3155 const xmlNode *migrate_from,
3158 const xmlNode *xml_op = migrate_to;
3159 const char *source = NULL;
3160 const char *
target = NULL;
3161 bool same_node =
false;
3164 xml_op = migrate_from;
3175 xml_op = migrate_from;
3179 xml_op = migrate_to;
3184 xml_op = migrate_to;
3188 xml_op = migrate_from;
3196 return non_monitor_after(rsc_id, node_name, xml_op, same_node,
scheduler)
3197 || monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
3214 get_migration_node_names(
const xmlNode *entry,
const pcmk_node_t *source_node,
3216 const char **source_name,
const char **target_name)
3220 if ((*source_name == NULL) || (*target_name == NULL)) {
3227 if ((source_node != NULL)
3228 && !pcmk__str_eq(*source_name, source_node->
details->
uname,
3232 pcmk__xe_id(entry), *source_name,
3233 pcmk__node_name(source_node));
3237 if ((target_node != NULL)
3238 && !pcmk__str_eq(*target_name, target_node->
details->
uname,
3242 pcmk__xe_id(entry), *target_name,
3243 pcmk__node_name(target_node));
3266 rsc->
id, pcmk__node_name(node));
3279 unpack_migrate_to_success(
struct action_history *history)
3315 xmlNode *migrate_from = NULL;
3316 const char *source = NULL;
3317 const char *
target = NULL;
3318 bool source_newer_op =
false;
3319 bool target_newer_state =
false;
3320 bool active_on_target =
false;
3323 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3329 source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3330 true, history->rsc->cluster);
3334 target, source, -1, history->rsc->cluster);
3335 if (migrate_from != NULL) {
3336 if (source_newer_op) {
3350 target_newer_state = newer_state_after_migrate(history->rsc->id,
target,
3351 history->xml, migrate_from,
3352 history->rsc->cluster);
3353 if (source_newer_op && target_newer_state) {
3362 add_dangling_migration(history->rsc, history->node);
3372 active_on_target = !target_newer_state && (target_node != NULL)
3376 if (active_on_target) {
3395 && unknown_on_node(history->rsc,
target)) {
3399 if (active_on_target) {
3405 if ((source_node != NULL) && source_node->
details->
online) {
3412 history->rsc->partial_migration_target = target_node;
3413 history->rsc->partial_migration_source = source_node;
3416 }
else if (!source_newer_op) {
3431 unpack_migrate_to_failure(
struct action_history *history)
3433 xmlNode *target_migrate_from = NULL;
3434 const char *source = NULL;
3435 const char *
target = NULL;
3438 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3449 target_migrate_from = find_lrm_op(history->rsc->id,
3458 !unknown_on_node(history->rsc,
target)
3462 && !newer_state_after_migrate(history->rsc->id,
target, history->xml,
3463 target_migrate_from,
3464 history->rsc->cluster)) {
3477 }
else if (!non_monitor_after(history->rsc->id, source, history->xml,
true,
3478 history->rsc->cluster)) {
3485 history->rsc->dangling_migrations =
3486 g_list_prepend(history->rsc->dangling_migrations,
3487 (gpointer) history->node);
3498 unpack_migrate_from_failure(
struct action_history *history)
3500 xmlNode *source_migrate_to = NULL;
3501 const char *source = NULL;
3502 const char *
target = NULL;
3505 if (get_migration_node_names(history->xml, NULL, history->node, &source,
3518 history->rsc->cluster);
3525 !unknown_on_node(history->rsc, source)
3529 && !newer_state_after_migrate(history->rsc->id, source,
3530 source_migrate_to, history->xml,
3531 history->rsc->cluster)) {
3552 record_failed_op(
struct action_history *history)
3554 if (!(history->node->details->online)) {
3558 for (
const xmlNode *xIter = history->rsc->cluster->failed->children;
3559 xIter != NULL; xIter = xIter->next) {
3561 const char *key = pcmk__xe_history_key(xIter);
3565 && pcmk__str_eq(
uname, history->node->details->uname,
3567 crm_trace(
"Skipping duplicate entry %s on %s",
3568 history->key, pcmk__node_name(history->node));
3573 crm_trace(
"Adding entry for %s on %s to failed action list",
3574 history->key, pcmk__node_name(history->node));
3581 last_change_str(
const xmlNode *xml_op)
3589 const char *p = strchr(when_s,
' ');
3592 if ((p != NULL) && (*(++p) !=
'\0')) {
3692 return first - second;
3707 if (fail_rsc->
parent != NULL) {
3710 if (pcmk__is_anonymous_clone(
parent)) {
3720 crm_notice(
"%s will not be started under current conditions", fail_rsc->
id);
3725 g_hash_table_foreach(fail_rsc->
allowed_nodes, set_node_score, &score);
3737 unpack_failure_handling(
struct action_history *history,
3742 history->interval_ms,
true);
3746 history->interval_ms, config);
3751 history->interval_ms, on_fail_str);
3754 g_hash_table_destroy(meta);
3768 unpack_rsc_op_failure(
struct action_history *history,
3770 enum rsc_role_e fail_role, xmlNode **last_failure,
3773 bool is_probe =
false;
3774 char *last_change_s = NULL;
3776 *last_failure = history->xml;
3779 last_change_s = last_change_str(history->xml);
3783 crm_trace(
"Unexpected result (%s%s%s) was recorded for " 3784 "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3785 services_ocf_exitcode_str(history->exit_status),
3786 (pcmk__str_empty(history->exit_reason)?
"" :
": "),
3787 pcmk__s(history->exit_reason,
""),
3788 (is_probe?
"probe" : history->task), history->rsc->id,
3789 pcmk__node_name(history->node), last_change_s,
3790 history->exit_status, history->id);
3793 "%s on %s at %s " CRM_XS " exit-status=%d id=%s",
3794 services_ocf_exitcode_str(history->exit_status),
3795 (pcmk__str_empty(history->exit_reason)?
"" :
": "),
3796 pcmk__s(history->exit_reason,
""),
3797 (is_probe?
"probe" : history->task), history->rsc->id,
3798 pcmk__node_name(history->node), last_change_s,
3799 history->exit_status, history->id);
3801 if (is_probe && (history->exit_status !=
PCMK_OCF_OK)
3808 crm_notice(
"If it is not possible for %s to run on %s, see " 3811 history->rsc->id, pcmk__node_name(history->node));
3814 record_failed_op(history);
3817 free(last_change_s);
3819 if (cmp_on_fail(*on_fail, config_on_fail) < 0) {
3823 *on_fail = config_on_fail;
3828 "__stop_fail__", history->rsc->cluster);
3831 unpack_migrate_to_failure(history);
3834 unpack_migrate_from_failure(history);
3860 pcmk__rsc_trace(history->rsc,
"Leaving %s stopped", history->rsc->id);
3865 set_active(history->rsc);
3869 "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3871 pcmk__btoa(history->node->details->unclean),
3876 && (history->rsc->next_role < fail_role)) {
3881 ban_from_all_nodes(history->rsc);
3895 block_if_unrecoverable(
struct action_history *history)
3897 char *last_change_s = NULL;
3902 if (
pe_can_fence(history->node->details->data_set, history->node)) {
3906 last_change_s = last_change_str(history->xml);
3908 "because %s on %s failed (%s%s%s) at %s " 3910 history->rsc->id, history->task,
3911 pcmk__node_name(history->node),
3912 services_ocf_exitcode_str(history->exit_status),
3913 (pcmk__str_empty(history->exit_reason)?
"" :
": "),
3914 pcmk__s(history->exit_reason,
""),
3915 last_change_s, history->exit_status, history->id);
3917 free(last_change_s);
3933 remap_because(
struct action_history *history,
const char **why,
int value,
3936 if (history->execution_status != value) {
3937 history->execution_status = value;
3965 remap_operation(
struct action_history *history,
3968 bool is_probe =
false;
3969 int orig_exit_status = history->exit_status;
3970 int orig_exec_status = history->execution_status;
3971 const char *why = NULL;
3972 const char *task = history->task;
3976 if (history->exit_status != orig_exit_status) {
3977 why =
"degraded result";
3978 if (!expired && (!history->node->details->shutdown
3979 || history->node->details->online)) {
3980 record_failed_op(history);
3984 if (!pcmk__is_bundled(history->rsc)
3990 why =
"equivalent probe result";
3998 switch (history->execution_status) {
4007 "node-fatal error");
4019 if (history->expected_exit_status < 0) {
4029 "obsolete history format");
4031 "(corrupt or obsolete CIB?)",
4032 history->key, pcmk__node_name(history->node));
4034 }
else if (history->exit_status == history->expected_exit_status) {
4040 "%s on %s: expected %d (%s), got %d (%s%s%s)",
4041 history->key, pcmk__node_name(history->node),
4042 history->expected_exit_status,
4043 services_ocf_exitcode_str(history->expected_exit_status),
4044 history->exit_status,
4045 services_ocf_exitcode_str(history->exit_status),
4046 (pcmk__str_empty(history->exit_reason)?
"" :
": "),
4047 pcmk__s(history->exit_reason,
""));
4050 switch (history->exit_status) {
4054 char *last_change_s = last_change_str(history->xml);
4058 "Probe found %s active on %s at %s",
4059 history->rsc->id, pcmk__node_name(history->node),
4061 free(last_change_s);
4067 || (history->expected_exit_status == history->exit_status)
4084 && (history->exit_status != history->expected_exit_status)) {
4085 char *last_change_s = last_change_str(history->xml);
4089 "Probe found %s active and promoted on %s at %s",
4091 pcmk__node_name(history->node), last_change_s);
4092 free(last_change_s);
4095 || (history->exit_status == history->expected_exit_status)) {
4113 guint interval_ms = 0;
4117 if (interval_ms == 0) {
4119 block_if_unrecoverable(history);
4134 block_if_unrecoverable(history);
4141 char *last_change_s = last_change_str(history->xml);
4143 crm_info(
"Treating unknown exit status %d from %s of %s " 4144 "on %s at %s as failure",
4145 history->exit_status, task, history->rsc->id,
4146 pcmk__node_name(history->node), last_change_s);
4148 "unknown exit status");
4149 free(last_change_s);
4157 "Remapped %s result from [%s: %s] to [%s: %s] " 4159 history->key, pcmk_exec_status_str(orig_exec_status),
4161 pcmk_exec_status_str(history->execution_status),
4168 should_clear_for_param_change(
const xmlNode *xml_op,
const char *task,
4185 switch (digest_data->
rc) {
4187 crm_trace(
"Resource %s history entry %s on %s" 4188 " has no digest to compare",
4189 rsc->
id, pcmk__xe_history_key(xml_op),
4218 should_ignore_failure_timeout(
const pcmk_resource_t *rsc,
const char *task,
4219 guint interval_ms,
bool is_last_failure)
4243 && (interval_ms != 0)
4249 if (is_last_failure) {
4250 crm_info(
"Waiting to clear monitor failure for remote node %s" 4251 " until fencing has occurred", rsc->
id);
4278 check_operation_expiry(
struct action_history *history)
4280 bool expired =
false;
4281 bool is_last_failure =
pcmk__ends_with(history->id,
"_last_failure_0");
4282 time_t last_run = 0;
4283 int unexpired_fail_count = 0;
4284 const char *clear_reason = NULL;
4288 "Resource history entry %s on %s is not expired: " 4289 "Not Installed does not expire",
4290 history->id, pcmk__node_name(history->node));
4294 if ((history->rsc->failure_timeout > 0)
4303 time_t last_failure = 0;
4306 if ((now >= (last_run + history->rsc->failure_timeout))
4307 && !should_ignore_failure_timeout(history->rsc, history->task,
4308 history->interval_ms,
4320 crm_trace(
"%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds" 4321 " last-failure@%lld",
4322 history->id, (
long long) last_run, (expired?
"" :
"not "),
4323 (
long long) now, unexpired_fail_count,
4324 history->rsc->failure_timeout, (
long long) last_failure);
4325 last_failure += history->rsc->failure_timeout + 1;
4326 if (unexpired_fail_count && (now < last_failure)) {
4328 "fail count expiration");
4337 if (unexpired_fail_count == 0) {
4339 clear_reason =
"it expired";
4349 "Resource history entry %s on %s is not " 4350 "expired: Unexpired fail count",
4351 history->id, pcmk__node_name(history->node));
4355 }
else if (is_last_failure
4356 && (history->rsc->remote_reconnect_ms != 0)) {
4360 clear_reason =
"reconnect interval is set";
4364 if (!expired && is_last_failure
4365 && should_clear_for_param_change(history->xml, history->task,
4366 history->rsc, history->node)) {
4367 clear_reason =
"resource parameters have changed";
4370 if (clear_reason != NULL) {
4375 clear_reason, history->rsc->cluster);
4379 && (history->rsc->remote_reconnect_ms != 0)) {
4388 crm_info(
"Clearing %s failure will wait until any scheduled " 4389 "fencing of %s completes",
4390 history->task, history->rsc->id);
4391 order_after_remote_fencing(clear_op, history->rsc,
4392 history->rsc->cluster);
4396 if (expired && (history->interval_ms == 0)
4398 switch (history->exit_status) {
4406 "Resource history entry %s on %s is not " 4407 "expired: Probe result",
4408 history->id, pcmk__node_name(history->node));
4440 update_resource_state(
struct action_history *history,
int exit_status,
4441 const xmlNode *last_failure,
4444 bool clear_past_failure =
false;
4447 || (!pcmk__is_bundled(history->rsc)
4452 clear_past_failure =
true;
4456 if ((last_failure != NULL)
4457 && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4459 clear_past_failure =
true;
4462 set_active(history->rsc);
4467 clear_past_failure =
true;
4471 clear_past_failure =
true;
4476 clear_past_failure =
true;
4484 clear_past_failure =
true;
4491 clear_past_failure =
true;
4495 unpack_migrate_to_success(history);
4499 history->rsc->id, pcmk__node_name(history->node));
4500 set_active(history->rsc);
4503 if (!clear_past_failure) {
4513 "%s (%s) is not cleared by a completed %s",
4525 "clear past failures");
4529 if (history->rsc->remote_reconnect_ms == 0) {
4538 "clear past failures and reset remote");
4553 can_affect_state(
struct action_history *history)
4582 unpack_action_result(
struct action_history *history)
4585 &(history->execution_status)) < 0)
4591 history->id, history->rsc->id,
4592 pcmk__node_name(history->node),
4599 &(history->exit_status)) < 0)
4600 || (history->exit_status < 0) || (history->exit_status >
CRM_EX_MAX)) {
4608 history->id, history->rsc->id,
4609 pcmk__node_name(history->node),
4633 process_expired_result(
struct action_history *history,
int orig_exit_status)
4635 if (!pcmk__is_bundled(history->rsc)
4637 && (orig_exit_status != history->expected_exit_status)) {
4642 crm_trace(
"Ignoring resource history entry %s for probe of %s on %s: " 4643 "Masked failure expired",
4644 history->id, history->rsc->id,
4645 pcmk__node_name(history->node));
4649 if (history->exit_status == history->expected_exit_status) {
4653 if (history->interval_ms == 0) {
4654 crm_notice(
"Ignoring resource history entry %s for %s of %s on %s: " 4656 history->id, history->task, history->rsc->id,
4657 pcmk__node_name(history->node));
4661 if (history->node->details->online && !history->node->details->unclean) {
4672 crm_notice(
"Rescheduling %s-interval %s of %s on %s " 4673 "after failure expired",
4675 history->rsc->id, pcmk__node_name(history->node));
4677 "calculated-failure-timeout");
4694 mask_probe_failure(
struct action_history *history,
int orig_exit_status,
4695 const xmlNode *last_failure,
4704 crm_notice(
"Treating probe result '%s' for %s on %s as 'not running'",
4705 services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
4706 pcmk__node_name(history->node));
4707 update_resource_state(history, history->expected_exit_status, last_failure,
4711 record_failed_op(history);
4713 "masked-probe-failure", history->rsc->cluster);
4729 failure_is_newer(
const struct action_history *history,
4730 const xmlNode *last_failure)
4732 guint failure_interval_ms = 0U;
4733 long long failure_change = 0LL;
4734 long long this_change = 0LL;
4736 if (last_failure == NULL) {
4740 if (!pcmk__str_eq(history->task,
4747 &failure_interval_ms) !=
pcmk_ok)
4748 || (history->interval_ms != failure_interval_ms)) {
4757 || (failure_change < this_change)) {
4772 process_pending_action(
struct action_history *history,
4773 const xmlNode *last_failure)
4784 if (failure_is_newer(history, last_failure)) {
4790 set_active(history->rsc);
4796 && history->node->details->unclean) {
4800 const char *migrate_target = NULL;
4811 if (history->rsc->pending_task != NULL) {
4825 history->rsc->pending_task = strdup(
"probe");
4826 history->rsc->pending_node = history->node;
4829 history->rsc->pending_task = strdup(history->task);
4830 history->rsc->pending_node = history->node;
4839 bool expired =
false;
4844 struct action_history history = {
4851 CRM_CHECK(rsc && node && xml_op,
return);
4853 history.id = pcmk__xe_id(xml_op);
4854 if (history.id == NULL) {
4856 "without ID", rsc->
id, pcmk__node_name(node));
4862 if (history.task == NULL) {
4865 history.id, rsc->
id, pcmk__node_name(node));
4869 if (!can_affect_state(&history)) {
4871 "Ignoring resource history entry %s for %s on %s " 4872 "with irrelevant action '%s'",
4873 history.id, rsc->
id, pcmk__node_name(node),
4878 if (unpack_action_result(&history) !=
pcmk_rc_ok) {
4883 history.key = pcmk__xe_history_key(xml_op);
4887 history.id, history.task, history.call_id,
4888 pcmk__node_name(node),
4889 pcmk_exec_status_str(history.execution_status),
4894 "%s is running on %s, which is unclean (further action " 4895 "depends on value of stop's on-fail attribute)",
4896 rsc->
id, pcmk__node_name(node));
4899 expired = check_operation_expiry(&history);
4900 old_rc = history.exit_status;
4902 remap_operation(&history, on_fail, expired);
4904 if (expired && (process_expired_result(&history, old_rc) ==
pcmk_rc_ok)) {
4909 mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4917 switch (history.execution_status) {
4919 process_pending_action(&history, *last_failure);
4923 update_resource_state(&history, history.exit_status, *last_failure,
4928 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4930 crm_warn(
"Cannot ignore failed %s of %s on %s: " 4931 "Resource agent doesn't exist " 4932 CRM_XS " status=%d rc=%d id=%s",
4933 history.task, rsc->
id, pcmk__node_name(node),
4934 history.execution_status, history.exit_status,
4943 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4944 last_failure, on_fail);
4948 if (pcmk__is_pacemaker_remote_node(node)
4974 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4979 char *last_change_s = last_change_str(xml_op);
4981 crm_warn(
"Pretending failed %s (%s%s%s) of %s on %s at %s succeeded " 4983 history.task, services_ocf_exitcode_str(history.exit_status),
4984 (pcmk__str_empty(history.exit_reason)?
"" :
": "),
4985 pcmk__s(history.exit_reason,
""), rsc->
id,
4986 pcmk__node_name(node), last_change_s, history.id);
4987 free(last_change_s);
4989 update_resource_state(&history, history.expected_exit_status,
4990 *last_failure, on_fail);
4994 record_failed_op(&history);
4998 *on_fail = failure_strategy;
5002 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
5003 last_failure, on_fail);
5006 uint8_t log_level = LOG_ERR;
5009 log_level = LOG_NOTICE;
5012 "Preventing %s from restarting on %s because " 5013 "of hard failure (%s%s%s) " CRM_XS " %s",
5014 parent->id, pcmk__node_name(node),
5015 services_ocf_exitcode_str(history.exit_status),
5016 (pcmk__str_empty(history.exit_reason)?
"" :
": "),
5017 pcmk__s(history.exit_reason,
""), history.id);
5023 "of fatal failure (%s%s%s) " CRM_XS " %s",
5025 services_ocf_exitcode_str(history.exit_status),
5026 (pcmk__str_empty(history.exit_reason)?
"" :
": "),
5027 pcmk__s(history.exit_reason,
""), history.id);
5035 rsc->
id, pcmk__node_name(node), history.id,
5041 add_node_attrs(
const xmlNode *xml_obj,
pcmk_node_t *node,
bool overwrite,
5044 const char *cluster_name = NULL;
5092 }
else if (cluster_name) {
5101 extract_operations(
const char *node,
const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
5104 int stop_index = -1;
5105 int start_index = -1;
5107 xmlNode *rsc_op = NULL;
5109 GList *gIter = NULL;
5110 GList *op_list = NULL;
5111 GList *sorted_op_list = NULL;
5115 sorted_op_list = NULL;
5118 rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
5123 op_list = g_list_prepend(op_list, rsc_op);
5127 if (op_list == NULL) {
5135 if (active_filter == FALSE) {
5136 return sorted_op_list;
5143 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5144 xmlNode *rsc_op = (xmlNode *) gIter->data;
5148 if (start_index < stop_index) {
5149 crm_trace(
"Skipping %s: not active", pcmk__xe_id(rsc_entry));
5152 }
else if (counter < start_index) {
5153 crm_trace(
"Skipping %s: old", pcmk__xe_id(rsc_op));
5156 op_list = g_list_append(op_list, rsc_op);
5159 g_list_free(sorted_op_list);
5167 GList *output = NULL;
5168 GList *intermediate = NULL;
5170 xmlNode *tmp = NULL;
5176 xmlNode *node_state = NULL;
5181 node_state != NULL; node_state = pcmk__xe_next(node_state)) {
5191 if(this_node == NULL) {
5195 }
else if (pcmk__is_pacemaker_remote_node(this_node)) {
5196 determine_remote_online_status(
scheduler, this_node);
5199 determine_online_status(node_state, this_node,
scheduler);
5208 xmlNode *lrm_rsc = NULL;
5216 lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) {
5226 intermediate = extract_operations(
uname, rsc_id, lrm_rsc, active_filter);
5227 output = g_list_concat(output, intermediate);
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
#define CRM_CHECK(expr, failure_action)
bool pe_can_fence(const pcmk_scheduler_t *scheduler, const pcmk_node_t *node)
pcmk_node_t * pcmk_find_node(const pcmk_scheduler_t *scheduler, const char *node_name)
Find a node by name in scheduler data.
xmlNode * pcmk__xml_copy(xmlNode *parent, xmlNode *src)
enum pe_quorum_policy no_quorum_policy
bool pe__shutdown_requested(const pcmk_node_t *node)
pcmk_ticket_t * ticket_new(const char *ticket_id, pcmk_scheduler_t *scheduler)
pcmk_node_t * pe__copy_node(const pcmk_node_t *this_node)
Service failed and possibly in promoted role.
#define crm_notice(fmt, args...)
#define PCMK__XE_LRM_RESOURCES
No connection to executor.
pcmk_scheduler_t * cluster
gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
const char * pcmk_role_text(enum rsc_role_e role)
Get readable description of a resource role.
#define PCMK_OPT_STONITH_ENABLED
#define PCMK__XE_TICKET_STATE
pcmk_node_t *(* location)(const pcmk_resource_t *rsc, GList **list, int current)
Service active and promoted.
bool pe__is_universal_clone(const pcmk_resource_t *rsc, const pcmk_scheduler_t *scheduler)
pcmk_node_t * partial_migration_target
int pcmk__xe_copy_attrs(xmlNode *target, const xmlNode *src, uint32_t flags)
pcmk_resource_t * pe__find_bundle_replica(const pcmk_resource_t *bundle, const pcmk_node_t *node)
int pcmk__scan_min_int(const char *text, int *result, int minimum)
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
#define stop_action(rsc, node, optional)
bool pcmk__strcase_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
#define PCMK_OPT_CONCURRENT_FENCING
#define PCMK_XE_PRIMITIVE
gint pe__cmp_node_name(gconstpointer a, gconstpointer b)
#define pcmk__config_warn(fmt...)
#define pcmk__rsc_trace(rsc, fmt, args...)
pcmk__op_digest_t * rsc_action_digest_cmp(pcmk_resource_t *rsc, const xmlNode *xml_op, pcmk_node_t *node, pcmk_scheduler_t *scheduler)
Match only clones and their instances, by either clone or instance ID.
#define PCMK_XA_RESOURCE_DISCOVERY
int priority_fencing_delay
pcmk_resource_t * pe_find_resource(GList *rsc_list, const char *id)
#define pcmk__rsc_info(rsc, fmt, args...)
#define PCMK_OPT_SHUTDOWN_LOCK
pcmk_resource_t * pe__create_clone_child(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
enum rsc_role_e next_role
bool pcmk_is_probe(const char *task, guint interval)
Check whether an action name and interval represent a probe.
gboolean get_target_role(const pcmk_resource_t *rsc, enum rsc_role_e *role)
int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b, bool same_node_default)
#define pcmk__config_err(fmt...)
#define PCMK_ACTION_META_DATA
#define PCMK_ACTION_MONITOR
#define PCMK_XA_EXIT_REASON
#define PCMK_XA_NO_QUORUM_PANIC
#define set_config_flag(scheduler, option, flag)
pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler)
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
#define PCMK_ACTION_MIGRATE_TO
#define SUB_XPATH_LRM_RSC_OP
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
#define PCMK_OPT_CLUSTER_NAME
Necessary CIB secrets are unavailable.
const char * pcmk_on_fail_text(enum action_fail_response on_fail)
Get string equivalent of a failure handling type.
#define PCMK__XA_OP_RESTART_DIGEST
#define CRM_LOG_ASSERT(expr)
Service promoted but more likely to fail soon.
pcmk_action_t * pe__clear_failcount(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *reason, pcmk_scheduler_t *scheduler)
Schedule a controller operation to clear a fail count.
enum crm_ais_msg_types type
#define CRMD_JOINSTATE_NACK
#define CRM_ATTR_CLUSTER_NAME
Ensure crm_exit_t can hold this.
void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
void pcmk__validate_cluster_options(GHashTable *options)
const char * pcmk__cluster_option(GHashTable *options, const char *name)
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Action did not complete in time.
const char * pcmk_rc_str(int rc)
Get a user-friendly description of a return code.
#define PCMK_NODE_ATTR_MAINTENANCE
#define PCMK_OPT_STOP_ORPHAN_ACTIONS
pcmk_scheduler_t * data_set
pcmk_resource_t * container
gboolean remote_was_fenced
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Execution failed, do not retry on node.
bool pcmk__ends_with(const char *s, const char *match)
#define PCMK_META_REMOTE_CONNECT_TIMEOUT
#define PCMK_OPT_STONITH_ACTION
#define PCMK_XA_OPERATION
int pe__unpack_resource(xmlNode *xml_obj, pcmk_resource_t **rsc, pcmk_resource_t *parent, pcmk_scheduler_t *scheduler)
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
gboolean remote_requires_reset
gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
int pe_get_failcount(const pcmk_node_t *node, pcmk_resource_t *rsc, time_t *last_failure, uint32_t flags, const xmlNode *xml_op)
No fence device is configured for target.
#define PCMK_OPT_ENABLE_STARTUP_PROBES
#define PCMK_META_REMOTE_ALLOW_MIGRATE
#define PCMK_META_IS_MANAGED
#define PCMK__XE_TRANSIENT_ATTRIBUTES
int pcmk__effective_rc(int rc)
bool pcmk_xe_is_probe(const xmlNode *xml_op)
Check whether an action history entry represents a probe.
#define PCMK__META_MIGRATE_TARGET
gboolean remote_maintenance
#define PCMK_META_REMOTE_ADDR
#define pcmk__rsc_debug(rsc, fmt, args...)
#define demote_action(rsc, node, optional)
char int pcmk_parse_interval_spec(const char *input, guint *result_ms)
Parse milliseconds from a Pacemaker interval specification.
pcmk_resource_t *(* find_rsc)(pcmk_resource_t *rsc, const char *search, const pcmk_node_t *node, int flags)
gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc)
Parse a transition key into its constituent parts.
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port)
#define PCMK_OPT_PLACEMENT_STRATEGY
#define PCMK_ACTION_DEMOTE
#define PCMK__XE_LRM_RESOURCE
#define PCMK__XA_TRANSITION_KEY
int pcmk__scan_ll(const char *text, long long *result, long long default_value)
GList * dangling_migrations
#define CRMD_JOINSTATE_DOWN
Maximum value for this enum.
#define crm_warn(fmt, args...)
guint remote_reconnect_ms
void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
const char * crm_exit_str(crm_exit_t exit_code)
char * clone_zero(const char *last_rsc_id)
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
#define crm_debug(fmt, args...)
Used only to initialize variables.
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
#define PCMK_OPT_STOP_ALL_RESOURCES
const char * pe_base_name_end(const char *id)
xmlNode * pcmk__xe_first_child(const xmlNode *parent, const char *node_name, const char *attr_n, const char *attr_v)
void resource_location(pcmk_resource_t *rsc, const pcmk_node_t *node, int score, const char *tag, pcmk_scheduler_t *scheduler)
Parameter invalid (in local context)
#define pcmk__sched_err(fmt...)
#define PCMK_XE_UTILIZATION
Parameter invalid (inherently)
bool pcmk_xe_mask_probe_failure(const xmlNode *xml_op)
Check whether an action history entry represents a maskable probe.
#define crm_trace(fmt, args...)
#define CRMD_JOINSTATE_MEMBER
#define do_crm_log(level, fmt, args...)
Log a message.
void pcmk__g_strcat(GString *buffer,...) G_GNUC_NULL_TERMINATED
bool xml_contains_remote_node(xmlNode *xml)
#define PCMK_VALUE_MEMBER
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
#define PCMK_OPT_MAINTENANCE_MODE
#define PCMK_META_REMOTE_NODE
#define pcmk__clear_scheduler_flags(scheduler, flags_to_clear)
const char * stonith_action
struct pe_node_shared_s * details
bool pe__bundle_needs_remote_name(pcmk_resource_t *rsc)
#define PCMK_OPT_SHUTDOWN_LOCK_LIMIT
#define crm_log_xml_debug(xml, text)
#define PCMK_XE_CLUSTER_PROPERTY_SET
#define PCMK_ACTION_START
#define PCMK_VALUE_IGNORE
#define PCMK_OPT_PRIORITY_FENCING_DELAY
void pcmk__str_update(char **str, const char *value)
Wrappers for and extensions to libxml2.
#define PCMK_OPT_STARTUP_FENCING
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
#define PCMK_META_TARGET_ROLE
void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pcmk_scheduler_t *scheduler)
Action completed, result is known.
int crm_element_value_epoch(const xmlNode *xml, const char *name, time_t *dest)
Retrieve the seconds-since-epoch value of an XML attribute.
GHashTable * pe__node_list2table(const GList *list)
#define PCMK_NODE_ATTR_TERMINATE
#define PCMK__ACTION_POWEROFF
#define pcmk__set_rsc_flags(resource, flags_to_set)
Execution failed, do not retry anywhere.
#define PCMK_NODE_ATTR_STANDBY
void pe__free_digests(gpointer ptr)
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
#define PCMK_OPT_NODE_PENDING_TIMEOUT
Dependencies not available locally.
#define PCMK_OPT_START_FAILURE_IS_FATAL
#define PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
enum pe_obj_types variant
bool pcmk__str_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
#define pcmk__str_copy(str)
#define pcmk__warn_once(wo_flag, fmt...)
const char * placement_strategy
gboolean order_actions(pcmk_action_t *first, pcmk_action_t *then, uint32_t flags)
void pe__update_recheck_time(time_t recheck, pcmk_scheduler_t *scheduler, const char *reason)
gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
#define PCMK_VALUE_FENCE_LEGACY
void native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_scheduler_t *scheduler, gboolean failed)
pcmk_node_t * pe_find_node_any(const GList *node_list, const char *id, const char *node_name)
Find a node by name or ID in a list of nodes.
#define PCMK_XE_META_ATTRIBUTES
pcmk_action_t * custom_action(pcmk_resource_t *rsc, char *key, const char *task, const pcmk_node_t *on_node, gboolean optional, pcmk_scheduler_t *scheduler)
Create or update an action object.
#define pcmk__assert(expr)
gboolean rsc_discovery_enabled
#define PCMK_VALUE_ONLINE
Requested action not implemented.
#define PCMK_OPT_STONITH_TIMEOUT
int crm_str_to_boolean(const char *s, int *ret)
xmlXPathObjectPtr xpath_search(const xmlNode *xml_top, const char *path)
int pe__target_rc_from_xml(const xmlNode *xml_op)
xmlNode * pcmk__find_action_config(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, bool include_disabled)
Service active but more likely to fail soon.
#define PCMK_META_INTERVAL
#define PCMK_XA_LAST_RC_CHANGE
Agent does not implement requested action.
#define PCMK_XE_FENCING_LEVEL
GHashTable * pcmk__strkey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
pcmk__action_result_t result
#define PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS
gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
guint node_pending_timeout
G_GNUC_INTERNAL gint pe__cmp_rsc_priority(gconstpointer a, gconstpointer b)
gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
#define PCMK_OPT_SYMMETRIC_CLUSTER
void pe__unpack_node_health_scores(pcmk_scheduler_t *scheduler)
pcmk_scheduler_t * scheduler
#define PCMK__XE_LRM_RSC_OP
char * pcmk__epoch2str(const time_t *source, uint32_t flags)
#define PCMK_META_REMOTE_PORT
#define PCMK_OPT_STOP_ORPHAN_RESOURCES
int pcmk__xe_get_score(const xmlNode *xml, const char *name, int *score, int default_score)
#define PCMK_ACTION_MIGRATE_FROM
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
#define PCMK__XA_OP_STATUS
#define pcmk__sched_warn(fmt...)
pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, int score, pcmk_scheduler_t *scheduler)
#define PCMK_META_ON_FAIL
#define crm_log_xml_info(xml, text)
#define PCMK_ACTION_PROMOTE
#define PCMK_OPT_NO_QUORUM_POLICY
#define PCMK_OPT_HAVE_WATCHDOG
#define CRMD_JOINSTATE_PENDING
#define PCMK__XE_NODE_STATE
#define PCMK_XA_LAST_GRANTED
CRM_TRACE_INIT_DATA(pe_status)
Agent or dependency not available locally.
void pe__clear_resource_history(pcmk_resource_t *rsc, const pcmk_node_t *node)
GHashTable * digest_cache
#define pcmk__set_action_flags(action, flags_to_set)
void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index)
GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler)
gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
#define PCMK__OPT_REMOVE_AFTER_STOP
void destroy_ticket(gpointer data)
void pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler)
const char * pcmk__readable_interval(guint interval_ms)
pcmk_node_t * pending_node
#define SUB_XPATH_LRM_RESOURCE
enum action_fail_response pcmk__parse_on_fail(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, const char *value)
gboolean crm_is_true(const char *s)
#define PCMK__META_CONTAINER
#define CRM_ATTR_SITE_NAME
Resource role is unknown.
#define PCMK__META_MIGRATE_SOURCE
#define PCMK_VALUE_FREEZE
Action cannot be attempted (e.g. shutdown)
enum rsc_role_e pcmk__role_after_failure(const pcmk_resource_t *rsc, const char *action_name, enum action_fail_response on_fail, GHashTable *meta)
xmlNode * pcmk__xe_create(xmlNode *parent, const char *name)
#define pcmk__assert_alloc(nmemb, size)
time_t get_effective_time(pcmk_scheduler_t *scheduler)
void freeXpathObject(xmlXPathObjectPtr xpathObj)
#define PCMK_VALUE_OFFLINE
#define PCMK_XE_INSTANCE_ATTRIBUTES
xmlNode * pcmk__xe_next_same(const xmlNode *node)
void pe__add_param_check(const xmlNode *rsc_op, pcmk_resource_t *rsc, pcmk_node_t *node, enum pcmk__check_parameters, pcmk_scheduler_t *scheduler)
#define XPATH_ENABLE_UNFENCING
#define PCMK_VALUE_REMOTE
Execution failed, may be retried.
#define crm_info(fmt, args...)
#define pcmk__clear_rsc_flags(resource, flags_to_clear)
void pcmk__insert_dup(GHashTable *table, const char *name, const char *value)
#define pcmk__set_scheduler_flags(scheduler, flags_to_set)
GHashTable * template_rsc_sets
#define PCMK_VALUE_DEMOTE
GHashTable * pcmk__unpack_action_meta(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *action_name, guint interval_ms, const xmlNode *action_config)
#define PCMK__XA_NODE_FENCED
char * clone_strip(const char *last_rsc_id)
#define PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
enum pcmk__digest_result rc
#define PCMK__XA_NODE_IN_MAINTENANCE
pcmk_resource_t * remote_rsc
pcmk_node_t * partial_migration_source
#define PCMK_ACTION_NOTIFY
#define PCMK_SCORE_INFINITY
Integer score to use to represent "infinity".
GHashTable * allowed_nodes
Where resource is running.