30 struct action_history {
39 const char *exit_reason;
42 int expected_exit_status;
51 #define set_config_flag(scheduler, option, flag) do { \ 52 GHashTable *config_hash = (scheduler)->config_hash; \ 53 const char *scf_value = pcmk__cluster_option(config_hash, (option)); \ 55 if (scf_value != NULL) { \ 56 if (crm_is_true(scf_value)) { \ 57 (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \ 58 LOG_TRACE, "Scheduler", \ 59 crm_system_name, (scheduler)->flags, \ 62 (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ 63 LOG_TRACE, "Scheduler", \ 64 crm_system_name, (scheduler)->flags, \ 71 xmlNode *xml_op, xmlNode **last_failure,
75 static void add_node_attrs(
const xmlNode *xml_obj,
pcmk_node_t *node,
77 static void determine_online_status(
const xmlNode *node_state,
81 static void unpack_node_lrm(
pcmk_node_t *node,
const xmlNode *xml,
91 if (pcmk__is_pacemaker_remote_node(node)
113 const char *reason,
bool priority_delay)
118 if (pcmk__is_guest_or_bundle_node(node)) {
124 "(otherwise would because %s): " 125 "its guest resource %s is unmanaged",
126 pcmk__node_name(node), reason, rsc->
id);
129 "(by recovering its guest resource %s): %s",
130 pcmk__node_name(node), rsc->
id, reason);
142 }
else if (is_dangling_guest_node(node)) {
143 crm_info(
"Cleaning up dangling connection for guest node %s: " 144 "fencing was already done because %s, " 145 "and guest resource no longer exists",
146 pcmk__node_name(node), reason);
150 }
else if (pcmk__is_remote_node(node)) {
155 "(otherwise would because %s): connection is unmanaged",
156 pcmk__node_name(node), reason);
160 pcmk__node_name(node),
169 crm_trace(
"Cluster node %s %s because %s",
170 pcmk__node_name(node),
176 pcmk__node_name(node),
187 #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \ 188 "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'" \ 189 "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \ 190 "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']" 193 #define XPATH_ENABLE_UNFENCING \ 194 "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \ 195 "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \ 196 "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \ 197 "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR 202 xmlXPathObjectPtr
result = NULL;
216 const char *value = NULL;
217 guint interval_ms = 0U;
239 crm_info(
"Startup probes: disabled (dangerous)");
244 crm_info(
"Watchdog-based self-fencing will be performed via SBD if " 259 if (interval_ms >= INT_MAX) {
269 crm_debug(
"STONITH of failed nodes is enabled");
271 crm_debug(
"STONITH of failed nodes is disabled");
280 "removed in a future release " 289 crm_debug(
"Concurrent fencing is enabled");
291 crm_debug(
"Concurrent fencing is disabled");
298 crm_trace(
"Priority fencing delay is %ds",
304 crm_debug(
"Stop all active resources: %s",
310 crm_debug(
"Cluster is symmetric" " - resources can run anywhere by default");
334 " to 'stop': cluster has never had quorum");
339 " to 'stop' because fencing is disabled");
349 crm_debug(
"On loss of quorum: Freeze resources");
352 crm_debug(
"On loss of quorum: Stop ALL resources");
356 "Demote promotable resources and stop other resources");
359 crm_notice(
"On loss of quorum: Fence all remaining nodes");
369 crm_trace(
"Orphan resources are stopped");
371 crm_trace(
"Orphan resources are ignored");
377 crm_trace(
"Orphan resource actions are stopped");
379 crm_trace(
"Orphan resource actions are ignored");
388 " cluster property is deprecated and will be " 389 "removed in a future release");
404 crm_trace(
"Start failures are always fatal");
406 crm_trace(
"Start failures are handled by failcount");
414 crm_trace(
"Unseen nodes will be fenced");
417 "Blind faith: not fencing unseen nodes");
432 crm_trace(
"Resources will be locked to nodes that were cleanly " 433 "shut down (locks expire after %s)",
436 crm_trace(
"Resources will not be locked to nodes that were cleanly " 446 crm_trace(
"Fence pending nodes after %s",
465 if (new_node == NULL) {
473 if (new_node->
details == NULL) {
507 "(such as %s) is deprecated and will be removed in a " 509 pcmk__s(
uname,
"unnamed node"));
515 if (pcmk__is_pacemaker_remote_node(new_node)) {
533 xmlNode *attr_set = NULL;
534 xmlNode *attr = NULL;
536 const char *container_id = pcmk__xe_id(xml_obj);
537 const char *remote_name = NULL;
538 const char *remote_server = NULL;
539 const char *remote_port = NULL;
540 const char *connect_timeout =
"60s";
541 const char *remote_allow_migrate=NULL;
542 const char *is_managed = NULL;
545 attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) {
552 attr != NULL; attr = pcmk__xe_next(attr)) {
565 remote_server = value;
571 connect_timeout = value;
574 remote_allow_migrate = value;
582 if (remote_name == NULL) {
591 remote_allow_migrate, is_managed,
592 connect_timeout, remote_server, remote_port);
625 xmlNode *xml_obj = NULL;
627 const char *
id = NULL;
628 const char *
uname = NULL;
629 const char *
type = NULL;
630 const char *score = NULL;
633 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
646 "> entry in configuration without id");
651 if (new_node == NULL) {
655 handle_startup_fencing(
scheduler, new_node);
657 add_node_attrs(xml_obj, new_node, FALSE,
scheduler);
666 crm_info(
"Creating a fake local node");
677 const char *container_id = NULL;
694 rsc->
id, container_id);
697 rsc->
id, container_id);
705 xmlNode *xml_obj = NULL;
711 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
713 const char *new_node_id = NULL;
719 new_node_id = pcmk__xe_id(xml_obj);
725 crm_trace(
"Found remote node %s defined by resource %s",
726 new_node_id, pcmk__xe_id(xml_obj));
741 new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
745 crm_trace(
"Found guest node %s in resource %s",
746 new_node_id, pcmk__xe_id(xml_obj));
757 xmlNode *xml_obj2 = NULL;
759 xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) {
761 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
766 crm_trace(
"Found guest node %s in resource %s inside group %s",
767 new_node_id, pcmk__xe_id(xml_obj2),
768 pcmk__xe_id(xml_obj));
803 pcmk__rsc_trace(new_rsc,
"Linking remote connection resource %s to %s",
804 new_rsc->
id, pcmk__node_name(remote_node));
811 handle_startup_fencing(
scheduler, remote_node);
823 destroy_tag(gpointer
data)
829 g_list_free_full(tag->
refs, free);
849 xmlNode *xml_obj = NULL;
855 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
858 const char *
id = pcmk__xe_id(xml_obj);
860 if (pcmk__str_empty(
id)) {
868 NULL, NULL) == FALSE) {
883 "because configuration is invalid",
903 pcmk__config_err(
"Resource start-up disabled since no STONITH resources have been defined");
906 pcmk__config_err(
"NOTE: Clusters with shared data need STONITH to ensure data integrity");
924 xmlNode *xml_obj = NULL;
933 if (pcmk__str_empty(pcmk__xe_id(xml_obj))) {
951 xmlNode *xml_tag = NULL;
956 xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) {
958 xmlNode *xml_obj_ref = NULL;
959 const char *tag_id = pcmk__xe_id(xml_tag);
965 if (tag_id == NULL) {
967 (
const char *) xml_tag->name);
972 xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
974 const char *obj_ref = pcmk__xe_id(xml_obj_ref);
980 if (obj_ref == NULL) {
982 xml_obj_ref->name, tag_id);
1000 const char *ticket_id = NULL;
1001 const char *granted = NULL;
1002 const char *last_granted = NULL;
1003 const char *standby = NULL;
1004 xmlAttrPtr xIter = NULL;
1008 ticket_id = pcmk__xe_id(xml_ticket);
1009 if (pcmk__str_empty(ticket_id)) {
1013 crm_trace(
"Processing ticket state for %s", ticket_id);
1016 if (ticket == NULL) {
1018 if (ticket == NULL) {
1023 for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1024 const char *prop_name = (
const char *)xIter->name;
1025 const char *prop_value = pcmk__xml_attr_value(xIter);
1039 crm_info(
"We do not have ticket '%s'", ticket->
id);
1044 long long last_granted_ll;
1054 crm_info(
"Granted ticket '%s' is in standby-mode", ticket->
id);
1060 crm_trace(
"Done with ticket state for %s", ticket_id);
1068 xmlNode *xml_obj = NULL;
1071 xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
1076 unpack_ticket_state(xml_obj,
scheduler);
1083 unpack_handle_remote_attrs(
pcmk_node_t *this_node,
const xmlNode *state,
1086 const char *discovery = NULL;
1087 const xmlNode *attrs = NULL;
1094 if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1097 crm_trace(
"Processing Pacemaker Remote node %s",
1098 pcmk__node_name(this_node));
1110 add_node_attrs(attrs, this_node, TRUE,
scheduler);
1113 crm_info(
"%s is shutting down", pcmk__node_name(this_node));
1119 crm_info(
"%s is in standby mode", pcmk__node_name(this_node));
1126 crm_info(
"%s is in maintenance mode", pcmk__node_name(this_node));
1133 if ((discovery != NULL) && !
crm_is_true(discovery)) {
1137 " node attribute is deprecated and will be removed" 1138 " (and behave as 'true') in a future release.");
1140 if (pcmk__is_remote_node(this_node)
1144 " attribute on Pacemaker Remote node %s" 1145 " because fencing is disabled",
1146 pcmk__node_name(this_node));
1153 crm_info(
"%s has resource discovery disabled",
1154 pcmk__node_name(this_node));
1169 unpack_transient_attributes(
const xmlNode *state,
pcmk_node_t *node,
1172 const char *discovery = NULL;
1177 add_node_attrs(attrs, node, TRUE,
scheduler);
1181 crm_info(
"%s is in standby mode", pcmk__node_name(node));
1187 crm_info(
"%s is in maintenance mode", pcmk__node_name(node));
1194 if ((discovery != NULL) && !
crm_is_true(discovery)) {
1197 " attribute for %s because disabling resource" 1198 " discovery is not allowed for cluster nodes",
1199 pcmk__node_name(node));
1218 const char *
id = NULL;
1219 const char *
uname = NULL;
1231 if (
uname == NULL) {
1244 if (this_node == NULL) {
1245 crm_notice(
"Ignoring recorded state for removed node with name %s and " 1250 if (pcmk__is_pacemaker_remote_node(this_node)) {
1261 unpack_transient_attributes(state, this_node,
scheduler);
1269 crm_trace(
"Determining online status of cluster node %s (id %s)",
1270 pcmk__node_name(this_node),
id);
1271 determine_online_status(state, this_node,
scheduler);
1303 unpack_node_history(
const xmlNode *status,
bool fence,
1314 const char *
id = pcmk__xe_id(state);
1318 if ((
id == NULL) || (
uname == NULL)) {
1320 crm_trace(
"Not unpacking resource history from malformed " 1326 if (this_node == NULL) {
1328 crm_trace(
"Not unpacking resource history for node %s because " 1329 "no longer in configuration",
id);
1334 crm_trace(
"Not unpacking resource history for node %s because " 1335 "already unpacked",
id);
1342 }
else if (pcmk__is_guest_or_bundle_node(this_node)) {
1351 crm_trace(
"Not unpacking resource history for guest node %s " 1352 "because container and connection are not known to " 1357 }
else if (pcmk__is_remote_node(this_node)) {
1368 crm_trace(
"Not unpacking resource history for remote node %s " 1369 "because connection is not known to be up",
id);
1382 crm_trace(
"Not unpacking resource history for offline " 1383 "cluster node %s",
id);
1387 if (pcmk__is_pacemaker_remote_node(this_node)) {
1388 determine_remote_online_status(
scheduler, this_node);
1389 unpack_handle_remote_attrs(this_node, state,
scheduler);
1392 crm_trace(
"Unpacking resource history for %snode %s",
1393 (fence?
"unseen " :
""),
id);
1396 unpack_node_lrm(this_node, state,
scheduler);
1409 xmlNode *state = NULL;
1418 state = pcmk__xe_next(state)) {
1421 unpack_tickets_state((xmlNode *) state,
scheduler);
1428 while (unpack_node_history(status, FALSE,
scheduler) == EAGAIN) {
1429 crm_trace(
"Another pass through node resource histories is needed");
1433 unpack_node_history(status,
1444 pcmk_node_t *node = pcmk__current_node(container);
1458 for (GList *gIter =
scheduler->
nodes; gIter != NULL; gIter = gIter->next) {
1461 if (!pcmk__is_pacemaker_remote_node(this_node)) {
1470 determine_remote_online_status(
scheduler, this_node);
1494 if (member_time == NULL) {
1510 long long when_member = 0LL;
1513 0LL) !=
pcmk_rc_ok) || (when_member < 0LL)) {
1532 unpack_node_online(
const xmlNode *node_state)
1545 long long when_online = 0LL;
1548 || (when_online < 0)) {
1567 unpack_node_terminate(
const pcmk_node_t *node,
const xmlNode *node_state)
1569 long long value = 0LL;
1576 return (value_i != 0);
1582 "node attribute for %s", value_s, pcmk__node_name(node));
1588 const xmlNode *node_state,
1591 gboolean online = FALSE;
1594 long long when_member = unpack_node_member(node_state,
scheduler);
1595 long long when_online = unpack_node_online(node_state);
1597 if (when_member <= 0) {
1598 crm_trace(
"Node %s is %sdown", pcmk__node_name(this_node),
1599 ((when_member < 0)?
"presumed " :
""));
1601 }
else if (when_online > 0) {
1605 crm_debug(
"Node %s is not ready to run resources: %s",
1606 pcmk__node_name(this_node), join);
1610 crm_trace(
"Node %s controller is down: " 1611 "member@%lld online@%lld join=%s expected=%s",
1612 pcmk__node_name(this_node), when_member, when_online,
1613 pcmk__s(join,
"<null>"), pcmk__s(exp_state,
"<null>"));
1618 crm_info(
"Node %s member@%lld online@%lld join=%s expected=%s",
1619 pcmk__node_name(this_node), when_member, when_online,
1620 pcmk__s(join,
"<null>"), pcmk__s(exp_state,
"<null>"));
1640 long long when_member,
long long when_online)
1643 && (when_member > 0) && (when_online <= 0)) {
1660 const xmlNode *node_state,
1663 bool termination_requested = unpack_node_terminate(this_node, node_state);
1666 long long when_member = unpack_node_member(node_state,
scheduler);
1667 long long when_online = unpack_node_online(node_state);
1687 crm_trace(
"Node %s member@%lld online@%lld join=%s expected=%s%s",
1688 pcmk__node_name(this_node), when_member, when_online,
1689 pcmk__s(join,
"<null>"), pcmk__s(exp_state,
"<null>"),
1690 (termination_requested?
" (termination requested)" :
""));
1693 crm_debug(
"%s is shutting down", pcmk__node_name(this_node));
1696 return (when_online > 0);
1699 if (when_member < 0) {
1701 "peer has not been seen by the cluster", FALSE);
1707 "peer failed Pacemaker membership criteria", FALSE);
1709 }
else if (termination_requested) {
1710 if ((when_member <= 0) && (when_online <= 0)
1712 crm_info(
"%s was fenced as requested", pcmk__node_name(this_node));
1720 if (pending_too_long(
scheduler, this_node, when_member, when_online)) {
1722 "peer pending timed out on joining the process group",
1725 }
else if ((when_member > 0) || (when_online > 0)) {
1726 crm_info(
"- %s is not ready to run resources",
1727 pcmk__node_name(this_node));
1732 crm_trace(
"%s is down or still coming up",
1733 pcmk__node_name(this_node));
1736 }
else if (when_member <= 0) {
1739 "peer is no longer part of the cluster", TRUE);
1741 }
else if (when_online <= 0) {
1743 "peer process is no longer available", FALSE);
1748 crm_info(
"%s is active", pcmk__node_name(this_node));
1752 crm_info(
"%s is not ready to run resources",
1753 pcmk__node_name(this_node));
1762 return (when_member > 0);
1779 goto remote_online_done;
1784 if (container && pcmk__list_of_1(rsc->
running_on)) {
1790 crm_trace(
"%s node %s presumed ONLINE because connection resource is started",
1791 (container?
"Guest" :
"Remote"), this_node->
details->
id);
1799 crm_trace(
"%s node %s shutting down because connection resource is stopping",
1800 (container?
"Guest" :
"Remote"), this_node->
details->
id);
1806 crm_trace(
"Guest node %s UNCLEAN because guest resource failed",
1812 crm_trace(
"%s node %s OFFLINE because connection resource failed",
1813 (container?
"Guest" :
"Remote"), this_node->
details->
id);
1817 || ((container != NULL)
1820 crm_trace(
"%s node %s OFFLINE because its resource is stopped",
1821 (container?
"Guest" :
"Remote"), this_node->
details->
id);
1825 }
else if (
host && (
host->details->online == FALSE)
1826 &&
host->details->unclean) {
1827 crm_trace(
"Guest node %s UNCLEAN because host is unclean",
1839 determine_online_status(
const xmlNode *node_state,
pcmk_node_t *this_node,
1842 gboolean online = FALSE;
1865 online = determine_online_status_no_fencing(
scheduler, node_state,
1869 online = determine_online_status_fencing(
scheduler, node_state,
1878 this_node->
fixed = TRUE;
1884 this_node->
fixed = TRUE;
1889 crm_info(
"%s is not a Pacemaker node", pcmk__node_name(this_node));
1895 crm_info(
"%s is %s", pcmk__node_name(this_node),
1902 crm_trace(
"%s is offline", pcmk__node_name(this_node));
1917 if (!pcmk__str_empty(
id)) {
1918 const char *end =
id + strlen(
id) - 1;
1920 for (
const char *s = end; s >
id; --s) {
1934 return (s == end)? s : (s - 1);
1958 char *basename = NULL;
1961 basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1980 size_t base_name_len = end - last_rsc_id + 1;
1985 memcpy(zero, last_rsc_id, base_name_len);
1986 zero[base_name_len] =
':';
1987 zero[base_name_len + 1] =
'0';
1992 create_fake_resource(
const char *rsc_id,
const xmlNode *rsc_entry,
2009 crm_debug(
"Detected orphaned remote node %s", rsc_id);
2018 crm_trace(
"Setting node %s as shutting down due to orphaned connection resource", rsc_id);
2025 crm_trace(
"Detected orphaned container filler %s", rsc_id);
2055 top->
id,
parent->id, rsc_id, pcmk__node_name(node));
2078 GList *rIter = NULL;
2081 gboolean skip_inactive = FALSE;
2087 rsc_id, pcmk__node_name(node),
parent->id);
2088 for (rIter =
parent->children; rsc == NULL && rIter; rIter = rIter->next) {
2089 GList *locations = NULL;
2115 if (pcmk__same_node((
pcmk_node_t *) locations->data, node)) {
2123 rsc =
parent->fns->find_rsc(child, rsc_id, NULL,
2134 crm_notice(
"Active (now-)anonymous clone %s has " 2135 "multiple (orphan) instance histories on %s",
2136 parent->id, pcmk__node_name(node));
2137 skip_inactive = TRUE;
2144 g_list_free(locations);
2148 if (!skip_inactive && !inactive_instance
2151 inactive_instance =
parent->fns->find_rsc(child, rsc_id, NULL,
2157 if ((inactive_instance != NULL) &&
2159 !pcmk__same_node(inactive_instance->
pending_node, node)) {
2160 inactive_instance = NULL;
2166 if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2168 inactive_instance->
id);
2169 rsc = inactive_instance;
2187 && !pcmk__is_guest_or_bundle_node(node)
2224 crm_trace(
"%s is not known as %s either (orphan)",
2230 crm_trace(
"Resource history for %s is orphaned because it is no longer primitive",
2238 if (pcmk__is_anonymous_clone(
parent)) {
2240 if (pcmk__is_bundled(
parent)) {
2256 rsc_id, pcmk__node_name(node), rsc->
id,
2263 process_orphan_resource(
const xmlNode *rsc_entry,
const pcmk_node_t *node,
2269 crm_debug(
"Detected orphan resource %s on %s",
2270 rsc_id, pcmk__node_name(node));
2271 rsc = create_fake_resource(rsc_id, rsc_entry,
scheduler);
2293 char *reason = NULL;
2311 ((rsc->
clone_name == NULL)?
"" :
" also known as "),
2313 pcmk__node_name(n));
2329 gboolean should_fence = FALSE;
2338 if (pcmk__is_guest_or_bundle_node(node)) {
2340 should_fence = TRUE;
2344 if (pcmk__is_remote_node(node)
2357 " revoked if remote connection can " 2358 "be re-established elsewhere)",
2361 should_fence = TRUE;
2365 if (reason == NULL) {
2374 save_on_fail = on_fail;
2420 "__action_migration_auto__", rsc->
cluster);
2439 if ((rsc->
container != NULL) && pcmk__is_bundled(rsc)) {
2462 if (pcmk__is_remote_node(tmpnode)
2468 "remote connection is unrecoverable", FALSE);
2500 crm_notice(
"Removed resource %s is active on %s and will be " 2501 "stopped when possible",
2502 rsc->
id, pcmk__node_name(node));
2504 crm_notice(
"Removed resource %s must be stopped manually on %s " 2506 " is set to false", rsc->
id, pcmk__node_name(node));
2537 GList *gIter = possible_matches;
2539 for (; gIter != NULL; gIter = gIter->next) {
2545 g_list_free(possible_matches);
2564 int start_index,
int stop_index,
2568 const char *task = NULL;
2569 const char *status = NULL;
2570 GList *gIter = sorted_op_list;
2574 rsc->
id, start_index, stop_index);
2576 for (; gIter != NULL; gIter = gIter->next) {
2577 xmlNode *rsc_op = (xmlNode *) gIter->data;
2579 guint interval_ms = 0;
2581 const char *
id = pcmk__xe_id(rsc_op);
2587 rsc->
id, pcmk__node_name(node));
2591 }
else if (start_index < stop_index && counter <= stop_index) {
2593 id, pcmk__node_name(node));
2596 }
else if (counter < start_index) {
2598 id, pcmk__node_name(node), counter);
2603 if (interval_ms == 0) {
2605 id, pcmk__node_name(node));
2612 id, pcmk__node_name(node));
2618 pcmk__rsc_trace(rsc,
"Creating %s on %s", key, pcmk__node_name(node));
2628 int implied_monitor_start = -1;
2629 int implied_clone_start = -1;
2630 const char *task = NULL;
2631 const char *status = NULL;
2636 for (
const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2637 const xmlNode *rsc_op = (
const xmlNode *) iter->data;
2646 *stop_index = counter;
2650 *start_index = counter;
2652 }
else if ((implied_monitor_start <= *stop_index)
2658 implied_monitor_start = counter;
2662 implied_clone_start = counter;
2666 if (*start_index == -1) {
2667 if (implied_clone_start != -1) {
2668 *start_index = implied_clone_start;
2669 }
else if (implied_monitor_start != -1) {
2670 *start_index = implied_monitor_start;
2680 time_t lock_time = 0;
2683 &lock_time) ==
pcmk_ok) && (lock_time != 0)) {
2689 rsc->
id, pcmk__node_name(node));
2713 unpack_lrm_resource(
pcmk_node_t *node,
const xmlNode *lrm_resource,
2716 GList *gIter = NULL;
2717 int stop_index = -1;
2718 int start_index = -1;
2721 const char *rsc_id = pcmk__xe_id(lrm_resource);
2724 GList *op_list = NULL;
2725 GList *sorted_op_list = NULL;
2727 xmlNode *rsc_op = NULL;
2728 xmlNode *last_failure = NULL;
2733 if (rsc_id == NULL) {
2740 rsc_id, pcmk__node_name(node));
2749 op_list = g_list_prepend(op_list, rsc_op);
2753 if (op_list == NULL) {
2760 rsc = unpack_find_resource(
scheduler, node, rsc_id);
2762 if (op_list == NULL) {
2766 rsc = process_orphan_resource(lrm_resource, node,
scheduler);
2773 unpack_shutdown_lock(lrm_resource, rsc, node,
scheduler);
2777 saved_role = rsc->
role;
2781 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2782 xmlNode *rsc_op = (xmlNode *) gIter->data;
2784 unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2789 process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2793 g_list_free(sorted_op_list);
2795 process_rsc_state(rsc, node, on_fail);
2799 || (req_role < rsc->next_role)) {
2805 "%s: Not overwriting calculated next role %s" 2806 " with requested next role %s",
2812 if (saved_role > rsc->
role) {
2813 rsc->
role = saved_role;
2820 handle_orphaned_container_fillers(
const xmlNode *lrm_rsc_list,
2825 rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
2830 const char *container_id;
2838 if (container_id == NULL || rsc_id == NULL) {
2843 if (container == NULL) {
2848 if ((rsc == NULL) || (rsc->
container != NULL)
2853 pcmk__rsc_trace(rsc,
"Mapped container of orphaned resource %s to %s",
2854 rsc->
id, container_id);
2869 unpack_node_lrm(
pcmk_node_t *node,
const xmlNode *xml,
2872 bool found_orphaned_container_filler =
false;
2894 found_orphaned_container_filler =
true;
2901 if (found_orphaned_container_filler) {
2902 handle_orphaned_container_fillers(xml,
scheduler);
2919 set_node_score(gpointer key, gpointer value, gpointer user_data)
2922 int *score = user_data;
2927 #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ 2928 "/" PCMK__XE_NODE_STATE 2929 #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \ 2930 "/" PCMK__XE_LRM_RESOURCES \ 2931 "/" PCMK__XE_LRM_RESOURCE 2932 #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP 2935 find_lrm_op(
const char *resource,
const char *op,
const char *node,
const char *source,
2938 GString *xpath = NULL;
2939 xmlNode *xml = NULL;
2941 CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2944 xpath = g_string_sized_new(256);
2957 }
else if ((source != NULL)
2963 g_string_append_c(xpath,
']');
2968 g_string_free(xpath, TRUE);
2970 if (xml && target_rc >= 0) {
2984 find_lrm_resource(
const char *rsc_id,
const char *node_name,
2987 GString *xpath = NULL;
2988 xmlNode *xml = NULL;
2990 CRM_CHECK((rsc_id != NULL) && (node_name != NULL),
return NULL);
2992 xpath = g_string_sized_new(256);
3001 g_string_free(xpath, TRUE);
3018 xmlXPathObjectPtr search;
3028 result = (numXpathResults(search) == 0);
3047 monitor_not_running_after(
const char *rsc_id,
const char *node_name,
3048 const xmlNode *xml_op,
bool same_node,
3073 non_monitor_after(
const char *rsc_id,
const char *node_name,
3074 const xmlNode *xml_op,
bool same_node,
3077 xmlNode *lrm_resource = NULL;
3079 lrm_resource = find_lrm_resource(rsc_id, node_name,
scheduler);
3080 if (lrm_resource == NULL) {
3088 const char * task = NULL;
3120 newer_state_after_migrate(
const char *rsc_id,
const char *node_name,
3121 const xmlNode *migrate_to,
3122 const xmlNode *migrate_from,
3125 const xmlNode *xml_op = migrate_to;
3126 const char *source = NULL;
3127 const char *
target = NULL;
3128 bool same_node =
false;
3131 xml_op = migrate_from;
3142 xml_op = migrate_from;
3146 xml_op = migrate_to;
3151 xml_op = migrate_to;
3155 xml_op = migrate_from;
3163 return non_monitor_after(rsc_id, node_name, xml_op, same_node,
scheduler)
3164 || monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
3181 get_migration_node_names(
const xmlNode *entry,
const pcmk_node_t *source_node,
3183 const char **source_name,
const char **target_name)
3187 if ((*source_name == NULL) || (*target_name == NULL)) {
3194 if ((source_node != NULL)
3195 && !pcmk__str_eq(*source_name, source_node->
details->
uname,
3199 pcmk__xe_id(entry), *source_name,
3200 pcmk__node_name(source_node));
3204 if ((target_node != NULL)
3205 && !pcmk__str_eq(*target_name, target_node->
details->
uname,
3209 pcmk__xe_id(entry), *target_name,
3210 pcmk__node_name(target_node));
3233 rsc->
id, pcmk__node_name(node));
3246 unpack_migrate_to_success(
struct action_history *history)
3282 xmlNode *migrate_from = NULL;
3283 const char *source = NULL;
3284 const char *
target = NULL;
3285 bool source_newer_op =
false;
3286 bool target_newer_state =
false;
3287 bool active_on_target =
false;
3290 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3296 source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3297 true, history->rsc->cluster);
3301 target, source, -1, history->rsc->cluster);
3302 if (migrate_from != NULL) {
3303 if (source_newer_op) {
3317 target_newer_state = newer_state_after_migrate(history->rsc->id,
target,
3318 history->xml, migrate_from,
3319 history->rsc->cluster);
3320 if (source_newer_op && target_newer_state) {
3329 add_dangling_migration(history->rsc, history->node);
3339 active_on_target = !target_newer_state && (target_node != NULL)
3343 if (active_on_target) {
3362 && unknown_on_node(history->rsc,
target)) {
3366 if (active_on_target) {
3372 if ((source_node != NULL) && source_node->
details->
online) {
3379 history->rsc->partial_migration_target = target_node;
3380 history->rsc->partial_migration_source = source_node;
3383 }
else if (!source_newer_op) {
3398 unpack_migrate_to_failure(
struct action_history *history)
3400 xmlNode *target_migrate_from = NULL;
3401 const char *source = NULL;
3402 const char *
target = NULL;
3405 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3416 target_migrate_from = find_lrm_op(history->rsc->id,
3425 !unknown_on_node(history->rsc,
target)
3429 && !newer_state_after_migrate(history->rsc->id,
target, history->xml,
3430 target_migrate_from,
3431 history->rsc->cluster)) {
3444 }
else if (!non_monitor_after(history->rsc->id, source, history->xml,
true,
3445 history->rsc->cluster)) {
3452 history->rsc->dangling_migrations =
3453 g_list_prepend(history->rsc->dangling_migrations,
3454 (gpointer) history->node);
3465 unpack_migrate_from_failure(
struct action_history *history)
3467 xmlNode *source_migrate_to = NULL;
3468 const char *source = NULL;
3469 const char *
target = NULL;
3472 if (get_migration_node_names(history->xml, NULL, history->node, &source,
3485 history->rsc->cluster);
3492 !unknown_on_node(history->rsc, source)
3496 && !newer_state_after_migrate(history->rsc->id, source,
3497 source_migrate_to, history->xml,
3498 history->rsc->cluster)) {
3519 record_failed_op(
struct action_history *history)
3521 if (!(history->node->details->online)) {
3525 for (
const xmlNode *xIter = history->rsc->cluster->failed->children;
3526 xIter != NULL; xIter = xIter->next) {
3528 const char *key = pcmk__xe_history_key(xIter);
3532 && pcmk__str_eq(
uname, history->node->details->uname,
3534 crm_trace(
"Skipping duplicate entry %s on %s",
3535 history->key, pcmk__node_name(history->node));
3540 crm_trace(
"Adding entry for %s on %s to failed action list",
3541 history->key, pcmk__node_name(history->node));
3548 last_change_str(
const xmlNode *xml_op)
3556 const char *p = strchr(when_s,
' ');
3559 if ((p != NULL) && (*(++p) !=
'\0')) {
3659 return first - second;
3674 if (fail_rsc->
parent != NULL) {
3677 if (pcmk__is_anonymous_clone(
parent)) {
3687 crm_notice(
"%s will not be started under current conditions", fail_rsc->
id);
3692 g_hash_table_foreach(fail_rsc->
allowed_nodes, set_node_score, &score);
3704 unpack_failure_handling(
struct action_history *history,
3709 history->interval_ms,
true);
3713 history->interval_ms, config);
3718 history->interval_ms, on_fail_str);
3721 g_hash_table_destroy(meta);
3735 unpack_rsc_op_failure(
struct action_history *history,
3737 enum rsc_role_e fail_role, xmlNode **last_failure,
3740 bool is_probe =
false;
3741 char *last_change_s = NULL;
3743 *last_failure = history->xml;
3746 last_change_s = last_change_str(history->xml);
3750 crm_trace(
"Unexpected result (%s%s%s) was recorded for " 3751 "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3752 services_ocf_exitcode_str(history->exit_status),
3753 (pcmk__str_empty(history->exit_reason)?
"" :
": "),
3754 pcmk__s(history->exit_reason,
""),
3755 (is_probe?
"probe" : history->task), history->rsc->id,
3756 pcmk__node_name(history->node), last_change_s,
3757 history->exit_status, history->id);
3760 "%s on %s at %s " CRM_XS " exit-status=%d id=%s",
3761 services_ocf_exitcode_str(history->exit_status),
3762 (pcmk__str_empty(history->exit_reason)?
"" :
": "),
3763 pcmk__s(history->exit_reason,
""),
3764 (is_probe?
"probe" : history->task), history->rsc->id,
3765 pcmk__node_name(history->node), last_change_s,
3766 history->exit_status, history->id);
3768 if (is_probe && (history->exit_status !=
PCMK_OCF_OK)
3775 crm_notice(
"If it is not possible for %s to run on %s, see " 3778 history->rsc->id, pcmk__node_name(history->node));
3781 record_failed_op(history);
3784 free(last_change_s);
3786 if (cmp_on_fail(*on_fail, config_on_fail) < 0) {
3790 *on_fail = config_on_fail;
3795 "__stop_fail__", history->rsc->cluster);
3798 unpack_migrate_to_failure(history);
3801 unpack_migrate_from_failure(history);
3827 pcmk__rsc_trace(history->rsc,
"Leaving %s stopped", history->rsc->id);
3832 set_active(history->rsc);
3836 "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3838 pcmk__btoa(history->node->details->unclean),
3843 && (history->rsc->next_role < fail_role)) {
3848 ban_from_all_nodes(history->rsc);
3862 block_if_unrecoverable(
struct action_history *history)
3864 char *last_change_s = NULL;
3869 if (
pe_can_fence(history->node->details->data_set, history->node)) {
3873 last_change_s = last_change_str(history->xml);
3875 "because %s on %s failed (%s%s%s) at %s " 3877 history->rsc->id, history->task,
3878 pcmk__node_name(history->node),
3879 services_ocf_exitcode_str(history->exit_status),
3880 (pcmk__str_empty(history->exit_reason)?
"" :
": "),
3881 pcmk__s(history->exit_reason,
""),
3882 last_change_s, history->exit_status, history->id);
3884 free(last_change_s);
3900 remap_because(
struct action_history *history,
const char **why,
int value,
3903 if (history->execution_status != value) {
3904 history->execution_status = value;
3932 remap_operation(
struct action_history *history,
3935 bool is_probe =
false;
3936 int orig_exit_status = history->exit_status;
3937 int orig_exec_status = history->execution_status;
3938 const char *why = NULL;
3939 const char *task = history->task;
3943 if (history->exit_status != orig_exit_status) {
3944 why =
"degraded result";
3945 if (!expired && (!history->node->details->shutdown
3946 || history->node->details->online)) {
3947 record_failed_op(history);
3951 if (!pcmk__is_bundled(history->rsc)
3957 why =
"equivalent probe result";
3965 switch (history->execution_status) {
3974 "node-fatal error");
3986 if (history->expected_exit_status < 0) {
3996 "obsolete history format");
3998 "(corrupt or obsolete CIB?)",
3999 history->key, pcmk__node_name(history->node));
4001 }
else if (history->exit_status == history->expected_exit_status) {
4007 "%s on %s: expected %d (%s), got %d (%s%s%s)",
4008 history->key, pcmk__node_name(history->node),
4009 history->expected_exit_status,
4010 services_ocf_exitcode_str(history->expected_exit_status),
4011 history->exit_status,
4012 services_ocf_exitcode_str(history->exit_status),
4013 (pcmk__str_empty(history->exit_reason)?
"" :
": "),
4014 pcmk__s(history->exit_reason,
""));
4017 switch (history->exit_status) {
4021 char *last_change_s = last_change_str(history->xml);
4025 "Probe found %s active on %s at %s",
4026 history->rsc->id, pcmk__node_name(history->node),
4028 free(last_change_s);
4034 || (history->expected_exit_status == history->exit_status)
4051 && (history->exit_status != history->expected_exit_status)) {
4052 char *last_change_s = last_change_str(history->xml);
4056 "Probe found %s active and promoted on %s at %s",
4058 pcmk__node_name(history->node), last_change_s);
4059 free(last_change_s);
4062 || (history->exit_status == history->expected_exit_status)) {
4080 guint interval_ms = 0;
4084 if (interval_ms == 0) {
4086 block_if_unrecoverable(history);
4101 block_if_unrecoverable(history);
4108 char *last_change_s = last_change_str(history->xml);
4110 crm_info(
"Treating unknown exit status %d from %s of %s " 4111 "on %s at %s as failure",
4112 history->exit_status, task, history->rsc->id,
4113 pcmk__node_name(history->node), last_change_s);
4115 "unknown exit status");
4116 free(last_change_s);
4124 "Remapped %s result from [%s: %s] to [%s: %s] " 4126 history->key, pcmk_exec_status_str(orig_exec_status),
4128 pcmk_exec_status_str(history->execution_status),
4135 should_clear_for_param_change(
const xmlNode *xml_op,
const char *task,
4152 switch (digest_data->
rc) {
4154 crm_trace(
"Resource %s history entry %s on %s" 4155 " has no digest to compare",
4156 rsc->
id, pcmk__xe_history_key(xml_op),
4185 should_ignore_failure_timeout(
const pcmk_resource_t *rsc,
const char *task,
4186 guint interval_ms,
bool is_last_failure)
4210 && (interval_ms != 0)
4216 if (is_last_failure) {
4217 crm_info(
"Waiting to clear monitor failure for remote node %s" 4218 " until fencing has occurred", rsc->
id);
4245 check_operation_expiry(
struct action_history *history)
4247 bool expired =
false;
4248 bool is_last_failure =
pcmk__ends_with(history->id,
"_last_failure_0");
4249 time_t last_run = 0;
4250 int unexpired_fail_count = 0;
4251 const char *clear_reason = NULL;
4255 "Resource history entry %s on %s is not expired: " 4256 "Not Installed does not expire",
4257 history->id, pcmk__node_name(history->node));
4261 if ((history->rsc->failure_timeout > 0)
4270 time_t last_failure = 0;
4273 if ((now >= (last_run + history->rsc->failure_timeout))
4274 && !should_ignore_failure_timeout(history->rsc, history->task,
4275 history->interval_ms,
4287 crm_trace(
"%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds" 4288 " last-failure@%lld",
4289 history->id, (
long long) last_run, (expired?
"" :
"not "),
4290 (
long long) now, unexpired_fail_count,
4291 history->rsc->failure_timeout, (
long long) last_failure);
4292 last_failure += history->rsc->failure_timeout + 1;
4293 if (unexpired_fail_count && (now < last_failure)) {
4295 "fail count expiration");
4304 if (unexpired_fail_count == 0) {
4306 clear_reason =
"it expired";
4316 "Resource history entry %s on %s is not " 4317 "expired: Unexpired fail count",
4318 history->id, pcmk__node_name(history->node));
4322 }
else if (is_last_failure
4323 && (history->rsc->remote_reconnect_ms != 0)) {
4327 clear_reason =
"reconnect interval is set";
4331 if (!expired && is_last_failure
4332 && should_clear_for_param_change(history->xml, history->task,
4333 history->rsc, history->node)) {
4334 clear_reason =
"resource parameters have changed";
4337 if (clear_reason != NULL) {
4342 clear_reason, history->rsc->cluster);
4346 && (history->rsc->remote_reconnect_ms != 0)) {
4355 crm_info(
"Clearing %s failure will wait until any scheduled " 4356 "fencing of %s completes",
4357 history->task, history->rsc->id);
4358 order_after_remote_fencing(clear_op, history->rsc,
4359 history->rsc->cluster);
4363 if (expired && (history->interval_ms == 0)
4365 switch (history->exit_status) {
4373 "Resource history entry %s on %s is not " 4374 "expired: Probe result",
4375 history->id, pcmk__node_name(history->node));
4407 update_resource_state(
struct action_history *history,
int exit_status,
4408 const xmlNode *last_failure,
4411 bool clear_past_failure =
false;
4414 || (!pcmk__is_bundled(history->rsc)
4419 clear_past_failure =
true;
4423 if ((last_failure != NULL)
4424 && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4426 clear_past_failure =
true;
4429 set_active(history->rsc);
4434 clear_past_failure =
true;
4438 clear_past_failure =
true;
4443 clear_past_failure =
true;
4451 clear_past_failure =
true;
4458 clear_past_failure =
true;
4462 unpack_migrate_to_success(history);
4466 history->rsc->id, pcmk__node_name(history->node));
4467 set_active(history->rsc);
4470 if (!clear_past_failure) {
4480 "%s (%s) is not cleared by a completed %s",
4492 "clear past failures");
4496 if (history->rsc->remote_reconnect_ms == 0) {
4505 "clear past failures and reset remote");
4520 can_affect_state(
struct action_history *history)
4549 unpack_action_result(
struct action_history *history)
4552 &(history->execution_status)) < 0)
4558 history->id, history->rsc->id,
4559 pcmk__node_name(history->node),
4566 &(history->exit_status)) < 0)
4567 || (history->exit_status < 0) || (history->exit_status >
CRM_EX_MAX)) {
4575 history->id, history->rsc->id,
4576 pcmk__node_name(history->node),
4600 process_expired_result(
struct action_history *history,
int orig_exit_status)
4602 if (!pcmk__is_bundled(history->rsc)
4604 && (orig_exit_status != history->expected_exit_status)) {
4609 crm_trace(
"Ignoring resource history entry %s for probe of %s on %s: " 4610 "Masked failure expired",
4611 history->id, history->rsc->id,
4612 pcmk__node_name(history->node));
4616 if (history->exit_status == history->expected_exit_status) {
4620 if (history->interval_ms == 0) {
4621 crm_notice(
"Ignoring resource history entry %s for %s of %s on %s: " 4623 history->id, history->task, history->rsc->id,
4624 pcmk__node_name(history->node));
4628 if (history->node->details->online && !history->node->details->unclean) {
4639 crm_notice(
"Rescheduling %s-interval %s of %s on %s " 4640 "after failure expired",
4642 history->rsc->id, pcmk__node_name(history->node));
4644 "calculated-failure-timeout");
4661 mask_probe_failure(
struct action_history *history,
int orig_exit_status,
4662 const xmlNode *last_failure,
4671 crm_notice(
"Treating probe result '%s' for %s on %s as 'not running'",
4672 services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
4673 pcmk__node_name(history->node));
4674 update_resource_state(history, history->expected_exit_status, last_failure,
4678 record_failed_op(history);
4680 "masked-probe-failure", history->rsc->cluster);
4696 failure_is_newer(
const struct action_history *history,
4697 const xmlNode *last_failure)
4699 guint failure_interval_ms = 0U;
4700 long long failure_change = 0LL;
4701 long long this_change = 0LL;
4703 if (last_failure == NULL) {
4707 if (!pcmk__str_eq(history->task,
4714 &failure_interval_ms) !=
pcmk_ok)
4715 || (history->interval_ms != failure_interval_ms)) {
4724 || (failure_change < this_change)) {
4739 process_pending_action(
struct action_history *history,
4740 const xmlNode *last_failure)
4751 if (failure_is_newer(history, last_failure)) {
4757 set_active(history->rsc);
4763 && history->node->details->unclean) {
4767 const char *migrate_target = NULL;
4778 if (history->rsc->pending_task != NULL) {
4792 history->rsc->pending_task = strdup(
"probe");
4793 history->rsc->pending_node = history->node;
4796 history->rsc->pending_task = strdup(history->task);
4797 history->rsc->pending_node = history->node;
4806 bool expired =
false;
4811 struct action_history history = {
4818 CRM_CHECK(rsc && node && xml_op,
return);
4820 history.id = pcmk__xe_id(xml_op);
4821 if (history.id == NULL) {
4823 "without ID", rsc->
id, pcmk__node_name(node));
4829 if (history.task == NULL) {
4832 history.id, rsc->
id, pcmk__node_name(node));
4836 if (!can_affect_state(&history)) {
4838 "Ignoring resource history entry %s for %s on %s " 4839 "with irrelevant action '%s'",
4840 history.id, rsc->
id, pcmk__node_name(node),
4845 if (unpack_action_result(&history) !=
pcmk_rc_ok) {
4850 history.key = pcmk__xe_history_key(xml_op);
4854 history.id, history.task, history.call_id,
4855 pcmk__node_name(node),
4856 pcmk_exec_status_str(history.execution_status),
4861 "%s is running on %s, which is unclean (further action " 4862 "depends on value of stop's on-fail attribute)",
4863 rsc->
id, pcmk__node_name(node));
4866 expired = check_operation_expiry(&history);
4867 old_rc = history.exit_status;
4869 remap_operation(&history, on_fail, expired);
4871 if (expired && (process_expired_result(&history, old_rc) ==
pcmk_rc_ok)) {
4876 mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4884 switch (history.execution_status) {
4886 process_pending_action(&history, *last_failure);
4890 update_resource_state(&history, history.exit_status, *last_failure,
4895 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4897 crm_warn(
"Cannot ignore failed %s of %s on %s: " 4898 "Resource agent doesn't exist " 4899 CRM_XS " status=%d rc=%d id=%s",
4900 history.task, rsc->
id, pcmk__node_name(node),
4901 history.execution_status, history.exit_status,
4910 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4911 last_failure, on_fail);
4915 if (pcmk__is_pacemaker_remote_node(node)
4941 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4946 char *last_change_s = last_change_str(xml_op);
4948 crm_warn(
"Pretending failed %s (%s%s%s) of %s on %s at %s succeeded " 4950 history.task, services_ocf_exitcode_str(history.exit_status),
4951 (pcmk__str_empty(history.exit_reason)?
"" :
": "),
4952 pcmk__s(history.exit_reason,
""), rsc->
id,
4953 pcmk__node_name(node), last_change_s, history.id);
4954 free(last_change_s);
4956 update_resource_state(&history, history.expected_exit_status,
4957 *last_failure, on_fail);
4961 record_failed_op(&history);
4965 *on_fail = failure_strategy;
4969 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4970 last_failure, on_fail);
4973 uint8_t log_level = LOG_ERR;
4976 log_level = LOG_NOTICE;
4979 "Preventing %s from restarting on %s because " 4980 "of hard failure (%s%s%s) " CRM_XS " %s",
4981 parent->id, pcmk__node_name(node),
4982 services_ocf_exitcode_str(history.exit_status),
4983 (pcmk__str_empty(history.exit_reason)?
"" :
": "),
4984 pcmk__s(history.exit_reason,
""), history.id);
4990 "of fatal failure (%s%s%s) " CRM_XS " %s",
4992 services_ocf_exitcode_str(history.exit_status),
4993 (pcmk__str_empty(history.exit_reason)?
"" :
": "),
4994 pcmk__s(history.exit_reason,
""), history.id);
5002 rsc->
id, pcmk__node_name(node), history.id,
5008 add_node_attrs(
const xmlNode *xml_obj,
pcmk_node_t *node,
bool overwrite,
5011 const char *cluster_name = NULL;
5059 }
else if (cluster_name) {
5068 extract_operations(
const char *node,
const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
5071 int stop_index = -1;
5072 int start_index = -1;
5074 xmlNode *rsc_op = NULL;
5076 GList *gIter = NULL;
5077 GList *op_list = NULL;
5078 GList *sorted_op_list = NULL;
5082 sorted_op_list = NULL;
5085 rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
5090 op_list = g_list_prepend(op_list, rsc_op);
5094 if (op_list == NULL) {
5102 if (active_filter == FALSE) {
5103 return sorted_op_list;
5110 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5111 xmlNode *rsc_op = (xmlNode *) gIter->data;
5115 if (start_index < stop_index) {
5116 crm_trace(
"Skipping %s: not active", pcmk__xe_id(rsc_entry));
5119 }
else if (counter < start_index) {
5120 crm_trace(
"Skipping %s: old", pcmk__xe_id(rsc_op));
5123 op_list = g_list_append(op_list, rsc_op);
5126 g_list_free(sorted_op_list);
5134 GList *output = NULL;
5135 GList *intermediate = NULL;
5137 xmlNode *tmp = NULL;
5143 xmlNode *node_state = NULL;
5148 node_state != NULL; node_state = pcmk__xe_next(node_state)) {
5158 if(this_node == NULL) {
5162 }
else if (pcmk__is_pacemaker_remote_node(this_node)) {
5163 determine_remote_online_status(
scheduler, this_node);
5166 determine_online_status(node_state, this_node,
scheduler);
5175 xmlNode *lrm_rsc = NULL;
5183 lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) {
5193 intermediate = extract_operations(
uname, rsc_id, lrm_rsc, active_filter);
5194 output = g_list_concat(output, intermediate);
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
#define CRM_CHECK(expr, failure_action)
bool pe_can_fence(const pcmk_scheduler_t *scheduler, const pcmk_node_t *node)
pcmk_node_t * pcmk_find_node(const pcmk_scheduler_t *scheduler, const char *node_name)
Find a node by name in scheduler data.
xmlNode * pcmk__xml_copy(xmlNode *parent, xmlNode *src)
enum pe_quorum_policy no_quorum_policy
bool pe__shutdown_requested(const pcmk_node_t *node)
pcmk_ticket_t * ticket_new(const char *ticket_id, pcmk_scheduler_t *scheduler)
pcmk_node_t * pe__copy_node(const pcmk_node_t *this_node)
Service failed and possibly in promoted role.
#define crm_notice(fmt, args...)
#define PCMK__XE_LRM_RESOURCES
No connection to executor.
pcmk_scheduler_t * cluster
gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
const char * pcmk_role_text(enum rsc_role_e role)
Get readable description of a resource role.
#define PCMK_OPT_STONITH_ENABLED
#define PCMK__XE_TICKET_STATE
pcmk_node_t *(* location)(const pcmk_resource_t *rsc, GList **list, int current)
Service active and promoted.
bool pe__is_universal_clone(const pcmk_resource_t *rsc, const pcmk_scheduler_t *scheduler)
pcmk_node_t * partial_migration_target
int pcmk__xe_copy_attrs(xmlNode *target, const xmlNode *src, uint32_t flags)
pcmk_resource_t * pe__find_bundle_replica(const pcmk_resource_t *bundle, const pcmk_node_t *node)
int pcmk__scan_min_int(const char *text, int *result, int minimum)
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
#define stop_action(rsc, node, optional)
bool pcmk__strcase_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
#define PCMK_OPT_CONCURRENT_FENCING
#define PCMK_XE_PRIMITIVE
gint pe__cmp_node_name(gconstpointer a, gconstpointer b)
#define pcmk__config_warn(fmt...)
#define pcmk__rsc_trace(rsc, fmt, args...)
pcmk__op_digest_t * rsc_action_digest_cmp(pcmk_resource_t *rsc, const xmlNode *xml_op, pcmk_node_t *node, pcmk_scheduler_t *scheduler)
Match only clones and their instances, by either clone or instance ID.
#define PCMK_XA_RESOURCE_DISCOVERY
gboolean order_actions(pcmk_action_t *lh_action, pcmk_action_t *rh_action, uint32_t flags)
int priority_fencing_delay
#define pcmk__rsc_info(rsc, fmt, args...)
#define PCMK_OPT_SHUTDOWN_LOCK
pcmk_resource_t * pe__create_clone_child(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
enum rsc_role_e next_role
bool pcmk_is_probe(const char *task, guint interval)
Check whether an action name and interval represent a probe.
gboolean get_target_role(const pcmk_resource_t *rsc, enum rsc_role_e *role)
int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b, bool same_node_default)
#define pcmk__config_err(fmt...)
#define PCMK_ACTION_META_DATA
#define PCMK_ACTION_MONITOR
#define PCMK_XA_EXIT_REASON
#define PCMK_XA_NO_QUORUM_PANIC
#define set_config_flag(scheduler, option, flag)
pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler)
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
#define PCMK_ACTION_MIGRATE_TO
#define SUB_XPATH_LRM_RSC_OP
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
#define PCMK_OPT_CLUSTER_NAME
Necessary CIB secrets are unavailable.
const char * pcmk_on_fail_text(enum action_fail_response on_fail)
Get string equivalent of a failure handling type.
#define PCMK__XA_OP_RESTART_DIGEST
#define CRM_LOG_ASSERT(expr)
Service promoted but more likely to fail soon.
pcmk_action_t * pe__clear_failcount(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *reason, pcmk_scheduler_t *scheduler)
Schedule a controller operation to clear a fail count.
enum crm_ais_msg_types type
#define CRMD_JOINSTATE_NACK
#define CRM_ATTR_CLUSTER_NAME
Ensure crm_exit_t can hold this.
void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
void pcmk__validate_cluster_options(GHashTable *options)
const char * pcmk__cluster_option(GHashTable *options, const char *name)
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Action did not complete in time.
#define PCMK_NODE_ATTR_MAINTENANCE
#define PCMK_OPT_STOP_ORPHAN_ACTIONS
pcmk_scheduler_t * data_set
pcmk_resource_t * container
gboolean remote_was_fenced
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Execution failed, do not retry on node.
bool pcmk__ends_with(const char *s, const char *match)
#define PCMK_META_REMOTE_CONNECT_TIMEOUT
#define PCMK_OPT_STONITH_ACTION
#define PCMK_XA_OPERATION
int pe__unpack_resource(xmlNode *xml_obj, pcmk_resource_t **rsc, pcmk_resource_t *parent, pcmk_scheduler_t *scheduler)
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
gboolean remote_requires_reset
gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
int pe_get_failcount(const pcmk_node_t *node, pcmk_resource_t *rsc, time_t *last_failure, uint32_t flags, const xmlNode *xml_op)
No fence device is configured for target.
#define PCMK_OPT_ENABLE_STARTUP_PROBES
#define PCMK_META_REMOTE_ALLOW_MIGRATE
#define PCMK_META_IS_MANAGED
#define PCMK__XE_TRANSIENT_ATTRIBUTES
int pcmk__effective_rc(int rc)
bool pcmk_xe_is_probe(const xmlNode *xml_op)
Check whether an action history entry represents a probe.
#define PCMK__META_MIGRATE_TARGET
gboolean remote_maintenance
#define PCMK_META_REMOTE_ADDR
#define pcmk__rsc_debug(rsc, fmt, args...)
#define demote_action(rsc, node, optional)
char int pcmk_parse_interval_spec(const char *input, guint *result_ms)
Parse milliseconds from a Pacemaker interval specification.
pcmk_resource_t *(* find_rsc)(pcmk_resource_t *rsc, const char *search, const pcmk_node_t *node, int flags)
gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc)
Parse a transition key into its constituent parts.
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port)
#define PCMK_OPT_PLACEMENT_STRATEGY
#define PCMK_ACTION_DEMOTE
#define PCMK__XE_LRM_RESOURCE
#define PCMK__XA_TRANSITION_KEY
int pcmk__scan_ll(const char *text, long long *result, long long default_value)
GList * dangling_migrations
#define CRMD_JOINSTATE_DOWN
Maximum value for this enum.
#define crm_warn(fmt, args...)
pcmk_resource_t * pe_find_resource(GList *rsc_list, const char *id_rh)
guint remote_reconnect_ms
void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
const char * crm_exit_str(crm_exit_t exit_code)
char * clone_zero(const char *last_rsc_id)
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
#define crm_debug(fmt, args...)
Used only to initialize variables.
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
#define PCMK_OPT_STOP_ALL_RESOURCES
const char * pe_base_name_end(const char *id)
xmlNode * pcmk__xe_first_child(const xmlNode *parent, const char *node_name, const char *attr_n, const char *attr_v)
void resource_location(pcmk_resource_t *rsc, const pcmk_node_t *node, int score, const char *tag, pcmk_scheduler_t *scheduler)
Parameter invalid (in local context)
#define pcmk__sched_err(fmt...)
#define PCMK_XE_UTILIZATION
int char2score(const char *score)
Get the integer value of a score string.
Parameter invalid (inherently)
bool pcmk_xe_mask_probe_failure(const xmlNode *xml_op)
Check whether an action history entry represents a maskable probe.
#define crm_trace(fmt, args...)
#define CRMD_JOINSTATE_MEMBER
#define do_crm_log(level, fmt, args...)
Log a message.
void pcmk__g_strcat(GString *buffer,...) G_GNUC_NULL_TERMINATED
bool xml_contains_remote_node(xmlNode *xml)
#define PCMK_VALUE_MEMBER
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
#define PCMK_OPT_MAINTENANCE_MODE
#define PCMK_META_REMOTE_NODE
#define pcmk__clear_scheduler_flags(scheduler, flags_to_clear)
const char * stonith_action
struct pe_node_shared_s * details
bool pe__bundle_needs_remote_name(pcmk_resource_t *rsc)
#define PCMK_OPT_SHUTDOWN_LOCK_LIMIT
#define crm_log_xml_debug(xml, text)
#define PCMK_XE_CLUSTER_PROPERTY_SET
#define PCMK_ACTION_START
#define PCMK_VALUE_IGNORE
#define PCMK_OPT_PRIORITY_FENCING_DELAY
void pcmk__str_update(char **str, const char *value)
Wrappers for and extensions to libxml2.
#define PCMK_OPT_STARTUP_FENCING
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
#define PCMK_META_TARGET_ROLE
void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pcmk_scheduler_t *scheduler)
Action completed, result is known.
int crm_element_value_epoch(const xmlNode *xml, const char *name, time_t *dest)
Retrieve the seconds-since-epoch value of an XML attribute.
GHashTable * pe__node_list2table(const GList *list)
#define PCMK_NODE_ATTR_TERMINATE
#define PCMK__ACTION_POWEROFF
#define pcmk__set_rsc_flags(resource, flags_to_set)
Execution failed, do not retry anywhere.
#define PCMK_NODE_ATTR_STANDBY
void pe__free_digests(gpointer ptr)
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
#define PCMK_OPT_NODE_PENDING_TIMEOUT
Dependencies not available locally.
#define PCMK_OPT_START_FAILURE_IS_FATAL
#define PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
enum pe_obj_types variant
bool pcmk__str_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
#define pcmk__str_copy(str)
#define pcmk__warn_once(wo_flag, fmt...)
const char * placement_strategy
void pe__update_recheck_time(time_t recheck, pcmk_scheduler_t *scheduler, const char *reason)
gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
#define PCMK_VALUE_FENCE_LEGACY
void native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_scheduler_t *scheduler, gboolean failed)
pcmk_node_t * pe_find_node_any(const GList *node_list, const char *id, const char *node_name)
Find a node by name or ID in a list of nodes.
#define PCMK_XE_META_ATTRIBUTES
pcmk_action_t * custom_action(pcmk_resource_t *rsc, char *key, const char *task, const pcmk_node_t *on_node, gboolean optional, pcmk_scheduler_t *scheduler)
Create or update an action object.
gboolean rsc_discovery_enabled
#define PCMK_VALUE_ONLINE
Requested action not implemented.
#define PCMK_OPT_STONITH_TIMEOUT
int crm_str_to_boolean(const char *s, int *ret)
xmlXPathObjectPtr xpath_search(const xmlNode *xml_top, const char *path)
int pe__target_rc_from_xml(const xmlNode *xml_op)
xmlNode * pcmk__find_action_config(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, bool include_disabled)
Service active but more likely to fail soon.
#define PCMK_META_INTERVAL
#define PCMK_XA_LAST_RC_CHANGE
Agent does not implement requested action.
#define PCMK_XE_FENCING_LEVEL
GHashTable * pcmk__strkey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
pcmk__action_result_t result
#define PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS
gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
guint node_pending_timeout
G_GNUC_INTERNAL gint pe__cmp_rsc_priority(gconstpointer a, gconstpointer b)
gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
#define PCMK_OPT_SYMMETRIC_CLUSTER
void pe__unpack_node_health_scores(pcmk_scheduler_t *scheduler)
pcmk_scheduler_t * scheduler
#define PCMK__XE_LRM_RSC_OP
char * pcmk__epoch2str(const time_t *source, uint32_t flags)
#define PCMK_META_REMOTE_PORT
#define PCMK_OPT_STOP_ORPHAN_RESOURCES
#define PCMK_ACTION_MIGRATE_FROM
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
#define PCMK__XA_OP_STATUS
#define pcmk__sched_warn(fmt...)
#define PCMK_META_ON_FAIL
#define crm_log_xml_info(xml, text)
#define PCMK_ACTION_PROMOTE
#define PCMK_OPT_NO_QUORUM_POLICY
#define PCMK_OPT_HAVE_WATCHDOG
#define CRMD_JOINSTATE_PENDING
#define PCMK__XE_NODE_STATE
#define PCMK_XA_LAST_GRANTED
CRM_TRACE_INIT_DATA(pe_status)
Agent or dependency not available locally.
void pe__clear_resource_history(pcmk_resource_t *rsc, const pcmk_node_t *node)
GHashTable * digest_cache
#define pcmk__set_action_flags(action, flags_to_set)
void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index)
GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler)
gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
#define PCMK__OPT_REMOVE_AFTER_STOP
void destroy_ticket(gpointer data)
void pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler)
const char * pcmk__readable_interval(guint interval_ms)
pcmk_node_t * pending_node
#define SUB_XPATH_LRM_RESOURCE
enum action_fail_response pcmk__parse_on_fail(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, const char *value)
gboolean crm_is_true(const char *s)
#define PCMK__META_CONTAINER
#define CRM_ATTR_SITE_NAME
pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pcmk_scheduler_t *scheduler)
Resource role is unknown.
#define PCMK__META_MIGRATE_SOURCE
#define PCMK_VALUE_FREEZE
Action cannot be attempted (e.g. shutdown)
enum rsc_role_e pcmk__role_after_failure(const pcmk_resource_t *rsc, const char *action_name, enum action_fail_response on_fail, GHashTable *meta)
xmlNode * pcmk__xe_create(xmlNode *parent, const char *name)
#define pcmk__assert_alloc(nmemb, size)
time_t get_effective_time(pcmk_scheduler_t *scheduler)
void freeXpathObject(xmlXPathObjectPtr xpathObj)
#define PCMK_VALUE_OFFLINE
#define PCMK_XE_INSTANCE_ATTRIBUTES
xmlNode * pcmk__xe_next_same(const xmlNode *node)
void pe__add_param_check(const xmlNode *rsc_op, pcmk_resource_t *rsc, pcmk_node_t *node, enum pcmk__check_parameters, pcmk_scheduler_t *scheduler)
#define XPATH_ENABLE_UNFENCING
#define PCMK_VALUE_REMOTE
Execution failed, may be retried.
#define crm_info(fmt, args...)
#define pcmk__clear_rsc_flags(resource, flags_to_clear)
void pcmk__insert_dup(GHashTable *table, const char *name, const char *value)
#define pcmk__set_scheduler_flags(scheduler, flags_to_set)
GHashTable * template_rsc_sets
#define PCMK_VALUE_DEMOTE
GHashTable * pcmk__unpack_action_meta(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *action_name, guint interval_ms, const xmlNode *action_config)
#define PCMK__XA_NODE_FENCED
char * clone_strip(const char *last_rsc_id)
#define PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
enum pcmk__digest_result rc
#define PCMK__XA_NODE_IN_MAINTENANCE
pcmk_resource_t * remote_rsc
pcmk_node_t * partial_migration_source
#define PCMK_ACTION_NOTIFY
#define PCMK_SCORE_INFINITY
Integer score to use to represent "infinity".
GHashTable * allowed_nodes
Where resource is running.