root/lib/pengine/unpack.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. is_dangling_guest_node
  2. pe_fence_node
  3. set_if_xpath
  4. unpack_config
  5. pe_create_node
  6. expand_remote_rsc_meta
  7. handle_startup_fencing
  8. unpack_nodes
  9. unpack_launcher
  10. unpack_remote_nodes
  11. link_rsc2remotenode
  12. unpack_resources
  13. pcmk__validate_fencing_topology
  14. unpack_tags
  15. unpack_ticket_state
  16. unpack_handle_remote_attrs
  17. unpack_transient_attributes
  18. unpack_node_state
  19. unpack_node_history
  20. unpack_status
  21. unpack_node_member
  22. unpack_node_online
  23. unpack_node_terminate
  24. determine_online_status_no_fencing
  25. pending_too_long
  26. determine_online_status_fencing
  27. determine_remote_online_status
  28. determine_online_status
  29. pe_base_name_end
  30. clone_strip
  31. clone_zero
  32. create_fake_resource
  33. create_anonymous_orphan
  34. find_anonymous_clone
  35. unpack_find_resource
  36. process_orphan_resource
  37. process_rsc_state
  38. process_recurring
  39. calculate_active_ops
  40. unpack_shutdown_lock
  41. unpack_lrm_resource
  42. handle_removed_launched_resources
  43. unpack_node_lrm
  44. set_active
  45. set_node_score
  46. find_lrm_op
  47. find_lrm_resource
  48. unknown_on_node
  49. monitor_not_running_after
  50. non_monitor_after
  51. newer_state_after_migrate
  52. get_migration_node_names
  53. add_dangling_migration
  54. unpack_migrate_to_success
  55. unpack_migrate_to_failure
  56. unpack_migrate_from_failure
  57. record_failed_op
  58. last_change_str
  59. ban_from_all_nodes
  60. unpack_failure_handling
  61. unpack_rsc_op_failure
  62. block_if_unrecoverable
  63. remap_because
  64. remap_operation
  65. should_clear_for_param_change
  66. order_after_remote_fencing
  67. should_ignore_failure_timeout
  68. check_operation_expiry
  69. pe__target_rc_from_xml
  70. update_resource_state
  71. can_affect_state
  72. unpack_action_result
  73. process_expired_result
  74. mask_probe_failure
  75. failure_is_newer
  76. process_pending_action
  77. unpack_rsc_op
  78. insert_attr
  79. add_node_attrs
  80. extract_operations
  81. find_operations

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <stdio.h>
  13 #include <string.h>
  14 #include <glib.h>
  15 #include <time.h>
  16 
  17 #include <crm/crm.h>
  18 #include <crm/services.h>
  19 #include <crm/common/xml.h>
  20 #include <crm/common/xml_internal.h>
  21 
  22 #include <crm/common/util.h>
  23 #include <crm/pengine/rules.h>
  24 #include <crm/pengine/internal.h>
  25 #include <pe_status_private.h>
  26 
  27 CRM_TRACE_INIT_DATA(pe_status);
  28 
  29 // A (parsed) resource action history entry
  30 struct action_history {
  31     pcmk_resource_t *rsc;       // Resource that history is for
  32     pcmk_node_t *node;        // Node that history is for
  33     xmlNode *xml;             // History entry XML
  34 
  35     // Parsed from entry XML
  36     const char *id;           // XML ID of history entry
  37     const char *key;          // Operation key of action
  38     const char *task;         // Action name
  39     const char *exit_reason;  // Exit reason given for result
  40     guint interval_ms;        // Action interval
  41     int call_id;              // Call ID of action
  42     int expected_exit_status; // Expected exit status of action
  43     int exit_status;          // Actual exit status of action
  44     int execution_status;     // Execution status of action
  45 };
  46 
  47 /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
  48  * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
  49  * flag is stringified more readably in log messages.
  50  */
  51 #define set_config_flag(scheduler, option, flag) do {                         \
  52         GHashTable *config_hash = (scheduler)->priv->options;                 \
  53         const char *scf_value = pcmk__cluster_option(config_hash, (option));  \
  54                                                                               \
  55         if (scf_value != NULL) {                                              \
  56             if (crm_is_true(scf_value)) {                                     \
  57                 (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__,   \
  58                                     LOG_TRACE, "Scheduler",                   \
  59                                     crm_system_name, (scheduler)->flags,      \
  60                                     (flag), #flag);                           \
  61             } else {                                                          \
  62                 (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
  63                                     LOG_TRACE, "Scheduler",                   \
  64                                     crm_system_name, (scheduler)->flags,      \
  65                                     (flag), #flag);                           \
  66             }                                                                 \
  67         }                                                                     \
  68     } while(0)
  69 
  70 static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
  71                           xmlNode *xml_op, xmlNode **last_failure,
  72                           enum pcmk__on_fail *failed);
  73 static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
  74                                            pcmk_node_t *this_node);
  75 static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
  76                            bool overwrite, pcmk_scheduler_t *scheduler);
  77 static void determine_online_status(const xmlNode *node_state,
  78                                     pcmk_node_t *this_node,
  79                                     pcmk_scheduler_t *scheduler);
  80 
  81 static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
  82                             pcmk_scheduler_t *scheduler);
  83 
  84 
  85 /*!
  86  * \internal
  87  * \brief Check whether a node is a dangling guest node
  88  *
  89  * \param[in] node  Node to check
  90  *
  91  * \return true if \p node had a Pacemaker Remote connection resource with a
  92  *         launcher that was removed from the CIB, otherwise false.
  93  */
  94 static bool
  95 is_dangling_guest_node(pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
  96 {
  97     return pcmk__is_pacemaker_remote_node(node)
  98            && (node->priv->remote != NULL)
  99            && (node->priv->remote->priv->launcher == NULL)
 100            && pcmk_is_set(node->priv->remote->flags,
 101                           pcmk__rsc_removed_launched);
 102 }
 103 
 104 /*!
 105  * \brief Schedule a fence action for a node
 106  *
 107  * \param[in,out] scheduler       Scheduler data
 108  * \param[in,out] node            Node to fence
 109  * \param[in]     reason          Text description of why fencing is needed
 110  * \param[in]     priority_delay  Whether to consider
 111  *                                \c PCMK_OPT_PRIORITY_FENCING_DELAY
 112  */
 113 void
 114 pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
 115               const char *reason, bool priority_delay)
 116 {
 117     CRM_CHECK(node, return);
 118 
 119     if (pcmk__is_guest_or_bundle_node(node)) {
 120         // Fence a guest or bundle node by marking its launcher as failed
 121         pcmk_resource_t *rsc = node->priv->remote->priv->launcher;
 122 
 123         if (!pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
 124             if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
 125                 crm_notice("Not fencing guest node %s "
 126                            "(otherwise would because %s): "
 127                            "its guest resource %s is unmanaged",
 128                            pcmk__node_name(node), reason, rsc->id);
 129             } else {
 130                 pcmk__sched_warn(scheduler,
 131                                  "Guest node %s will be fenced "
 132                                  "(by recovering its guest resource %s): %s",
 133                                  pcmk__node_name(node), rsc->id, reason);
 134 
 135                 /* We don't mark the node as unclean because that would prevent the
 136                  * node from running resources. We want to allow it to run resources
 137                  * in this transition if the recovery succeeds.
 138                  */
 139                 pcmk__set_node_flags(node, pcmk__node_remote_reset);
 140                 pcmk__set_rsc_flags(rsc,
 141                                     pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
 142             }
 143         }
 144 
 145     } else if (is_dangling_guest_node(node)) {
 146         crm_info("Cleaning up dangling connection for guest node %s: "
 147                  "fencing was already done because %s, "
 148                  "and guest resource no longer exists",
 149                  pcmk__node_name(node), reason);
 150         pcmk__set_rsc_flags(node->priv->remote,
 151                             pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
 152 
 153     } else if (pcmk__is_remote_node(node)) {
 154         pcmk_resource_t *rsc = node->priv->remote;
 155 
 156         if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
 157             crm_notice("Not fencing remote node %s "
 158                        "(otherwise would because %s): connection is unmanaged",
 159                        pcmk__node_name(node), reason);
 160         } else if (!pcmk_is_set(node->priv->flags, pcmk__node_remote_reset)) {
 161             pcmk__set_node_flags(node, pcmk__node_remote_reset);
 162             pcmk__sched_warn(scheduler, "Remote node %s %s: %s",
 163                              pcmk__node_name(node),
 164                              pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
 165                              reason);
 166         }
 167         node->details->unclean = TRUE;
 168         // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
 169         pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
 170 
 171     } else if (node->details->unclean) {
 172         crm_trace("Cluster node %s %s because %s",
 173                   pcmk__node_name(node),
 174                   pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
 175                   reason);
 176 
 177     } else {
 178         pcmk__sched_warn(scheduler, "Cluster node %s %s: %s",
 179                          pcmk__node_name(node),
 180                          pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
 181                          reason);
 182         node->details->unclean = TRUE;
 183         pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
 184     }
 185 }
 186 
 187 // @TODO xpaths can't handle templates, rules, or id-refs
 188 
 189 // nvpair with provides or requires set to unfencing
 190 #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR           \
 191     "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'"   \
 192     "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') "   \
 193     "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
 194 
 195 // unfencing in rsc_defaults or any resource
 196 #define XPATH_ENABLE_UNFENCING \
 197     "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES     \
 198     "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR             \
 199     "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
 200     "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
 201 
 202 static void
 203 set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 204 {
 205     xmlXPathObjectPtr result = NULL;
 206 
 207     if (!pcmk_is_set(scheduler->flags, flag)) {
 208         result = xpath_search(scheduler->input, xpath);
 209         if (result && (numXpathResults(result) > 0)) {
 210             pcmk__set_scheduler_flags(scheduler, flag);
 211         }
 212         freeXpathObject(result);
 213     }
 214 }
 215 
 216 gboolean
 217 unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 218 {
 219     const char *value = NULL;
 220     GHashTable *config_hash = pcmk__strkey_table(free, free);
 221 
 222     pe_rule_eval_data_t rule_data = {
 223         .node_hash = NULL,
 224         .now = scheduler->priv->now,
 225         .match_data = NULL,
 226         .rsc_data = NULL,
 227         .op_data = NULL
 228     };
 229 
 230     scheduler->priv->options = config_hash;
 231 
 232     pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET, &rule_data,
 233                                config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS,
 234                                scheduler);
 235 
 236     pcmk__validate_cluster_options(config_hash);
 237 
 238     set_config_flag(scheduler, PCMK_OPT_ENABLE_STARTUP_PROBES,
 239                     pcmk__sched_probe_resources);
 240     if (!pcmk_is_set(scheduler->flags, pcmk__sched_probe_resources)) {
 241         crm_info("Startup probes: disabled (dangerous)");
 242     }
 243 
 244     value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
 245     if (value && crm_is_true(value)) {
 246         crm_info("Watchdog-based self-fencing will be performed via SBD if "
 247                  "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
 248                  " is nonzero");
 249         pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_fencing);
 250     }
 251 
 252     /* Set certain flags via xpath here, so they can be used before the relevant
 253      * configuration sections are unpacked.
 254      */
 255     set_if_xpath(pcmk__sched_enable_unfencing, XPATH_ENABLE_UNFENCING,
 256                  scheduler);
 257 
 258     value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT);
 259     pcmk_parse_interval_spec(value, &(scheduler->priv->fence_timeout_ms));
 260 
 261     crm_debug("Default fencing action timeout: %s",
 262               pcmk__readable_interval(scheduler->priv->fence_timeout_ms));
 263 
 264     set_config_flag(scheduler, PCMK_OPT_STONITH_ENABLED,
 265                     pcmk__sched_fencing_enabled);
 266     if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
 267         crm_debug("STONITH of failed nodes is enabled");
 268     } else {
 269         crm_debug("STONITH of failed nodes is disabled");
 270     }
 271 
 272     scheduler->priv->fence_action =
 273         pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_ACTION);
 274     crm_trace("STONITH will %s nodes", scheduler->priv->fence_action);
 275 
 276     set_config_flag(scheduler, PCMK_OPT_CONCURRENT_FENCING,
 277                     pcmk__sched_concurrent_fencing);
 278     if (pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) {
 279         crm_debug("Concurrent fencing is enabled");
 280     } else {
 281         crm_debug("Concurrent fencing is disabled");
 282     }
 283 
 284     value = pcmk__cluster_option(config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY);
 285     if (value) {
 286         pcmk_parse_interval_spec(value,
 287                                  &(scheduler->priv->priority_fencing_ms));
 288         crm_trace("Priority fencing delay is %s",
 289                   pcmk__readable_interval(scheduler->priv->priority_fencing_ms));
 290     }
 291 
 292     set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES,
 293                     pcmk__sched_stop_all);
 294     crm_debug("Stop all active resources: %s",
 295               pcmk__flag_text(scheduler->flags, pcmk__sched_stop_all));
 296 
 297     set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER,
 298                     pcmk__sched_symmetric_cluster);
 299     if (pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) {
 300         crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
 301     }
 302 
 303     value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
 304 
 305     if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
 306         scheduler->no_quorum_policy = pcmk_no_quorum_ignore;
 307 
 308     } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
 309         scheduler->no_quorum_policy = pcmk_no_quorum_freeze;
 310 
 311     } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
 312         scheduler->no_quorum_policy = pcmk_no_quorum_demote;
 313 
 314     } else if (pcmk__strcase_any_of(value, PCMK_VALUE_FENCE,
 315                                     PCMK_VALUE_FENCE_LEGACY, NULL)) {
 316         if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
 317             int do_panic = 0;
 318 
 319             crm_element_value_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC,
 320                                   &do_panic);
 321             if (do_panic
 322                 || pcmk_is_set(scheduler->flags, pcmk__sched_quorate)) {
 323                 scheduler->no_quorum_policy = pcmk_no_quorum_fence;
 324             } else {
 325                 crm_notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY
 326                            " to 'stop': cluster has never had quorum");
 327                 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
 328             }
 329         } else {
 330             pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY
 331                              " to 'stop' because fencing is disabled");
 332             scheduler->no_quorum_policy = pcmk_no_quorum_stop;
 333         }
 334 
 335     } else {
 336         scheduler->no_quorum_policy = pcmk_no_quorum_stop;
 337     }
 338 
 339     switch (scheduler->no_quorum_policy) {
 340         case pcmk_no_quorum_freeze:
 341             crm_debug("On loss of quorum: Freeze resources");
 342             break;
 343         case pcmk_no_quorum_stop:
 344             crm_debug("On loss of quorum: Stop ALL resources");
 345             break;
 346         case pcmk_no_quorum_demote:
 347             crm_debug("On loss of quorum: "
 348                       "Demote promotable resources and stop other resources");
 349             break;
 350         case pcmk_no_quorum_fence:
 351             crm_notice("On loss of quorum: Fence all remaining nodes");
 352             break;
 353         case pcmk_no_quorum_ignore:
 354             crm_notice("On loss of quorum: Ignore");
 355             break;
 356     }
 357 
 358     set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_RESOURCES,
 359                     pcmk__sched_stop_removed_resources);
 360     if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
 361         crm_trace("Orphan resources are stopped");
 362     } else {
 363         crm_trace("Orphan resources are ignored");
 364     }
 365 
 366     set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_ACTIONS,
 367                     pcmk__sched_cancel_removed_actions);
 368     if (pcmk_is_set(scheduler->flags, pcmk__sched_cancel_removed_actions)) {
 369         crm_trace("Orphan resource actions are stopped");
 370     } else {
 371         crm_trace("Orphan resource actions are ignored");
 372     }
 373 
 374     set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE,
 375                     pcmk__sched_in_maintenance);
 376     crm_trace("Maintenance mode: %s",
 377               pcmk__flag_text(scheduler->flags, pcmk__sched_in_maintenance));
 378 
 379     set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL,
 380                     pcmk__sched_start_failure_fatal);
 381     if (pcmk_is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) {
 382         crm_trace("Start failures are always fatal");
 383     } else {
 384         crm_trace("Start failures are handled by failcount");
 385     }
 386 
 387     if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
 388         set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING,
 389                         pcmk__sched_startup_fencing);
 390     }
 391     if (pcmk_is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
 392         crm_trace("Unseen nodes will be fenced");
 393     } else {
 394         pcmk__warn_once(pcmk__wo_blind,
 395                         "Blind faith: not fencing unseen nodes");
 396     }
 397 
 398     pe__unpack_node_health_scores(scheduler);
 399 
 400     scheduler->priv->placement_strategy =
 401         pcmk__cluster_option(config_hash, PCMK_OPT_PLACEMENT_STRATEGY);
 402     crm_trace("Placement strategy: %s", scheduler->priv->placement_strategy);
 403 
 404     set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK,
 405                     pcmk__sched_shutdown_lock);
 406     if (pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
 407         value = pcmk__cluster_option(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
 408         pcmk_parse_interval_spec(value, &(scheduler->priv->shutdown_lock_ms));
 409         crm_trace("Resources will be locked to nodes that were cleanly "
 410                   "shut down (locks expire after %s)",
 411                   pcmk__readable_interval(scheduler->priv->shutdown_lock_ms));
 412     } else {
 413         crm_trace("Resources will not be locked to nodes that were cleanly "
 414                   "shut down");
 415     }
 416 
 417     value = pcmk__cluster_option(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
 418     pcmk_parse_interval_spec(value, &(scheduler->priv->node_pending_ms));
 419     if (scheduler->priv->node_pending_ms == 0U) {
 420         crm_trace("Do not fence pending nodes");
 421     } else {
 422         crm_trace("Fence pending nodes after %s",
 423                   pcmk__readable_interval(scheduler->priv->node_pending_ms));
 424     }
 425 
 426     return TRUE;
 427 }
 428 
 429 /*!
 430  * \internal
 431  * \brief Create a new node object in scheduler data
 432  *
 433  * \param[in]     id         ID of new node
 434  * \param[in]     uname      Name of new node
 435  * \param[in]     type       Type of new node
 436  * \param[in]     score      Score of new node
 437  * \param[in,out] scheduler  Scheduler data
 438  *
 439  * \return Newly created node object
 440  * \note The returned object is part of the scheduler data and should not be
 441  *       freed separately.
 442  */
 443 pcmk_node_t *
 444 pe_create_node(const char *id, const char *uname, const char *type,
     /* [previous][next][first][last][top][bottom][index][help] */
 445                int score, pcmk_scheduler_t *scheduler)
 446 {
 447     enum pcmk__node_variant variant = pcmk__node_variant_cluster;
 448     pcmk_node_t *new_node = NULL;
 449 
 450     if (pcmk_find_node(scheduler, uname) != NULL) {
 451         pcmk__config_warn("More than one node entry has name '%s'", uname);
 452     }
 453 
 454     if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
 455                      pcmk__str_null_matches|pcmk__str_casei)) {
 456         variant = pcmk__node_variant_cluster;
 457 
 458     } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
 459         variant = pcmk__node_variant_remote;
 460 
 461     } else {
 462         pcmk__config_err("Ignoring node %s with unrecognized type '%s'",
 463                          pcmk__s(uname, "without name"), type);
 464         return NULL;
 465     }
 466 
 467     new_node = calloc(1, sizeof(pcmk_node_t));
 468     if (new_node == NULL) {
 469         pcmk__sched_err(scheduler, "Could not allocate memory for node %s",
 470                         uname);
 471         return NULL;
 472     }
 473 
 474     new_node->assign = calloc(1, sizeof(struct pcmk__node_assignment));
 475     new_node->details = calloc(1, sizeof(struct pcmk__node_details));
 476     new_node->priv = calloc(1, sizeof(pcmk__node_private_t));
 477     if ((new_node->assign == NULL) || (new_node->details == NULL)
 478         || (new_node->priv == NULL)) {
 479         free(new_node->assign);
 480         free(new_node->details);
 481         free(new_node->priv);
 482         free(new_node);
 483         pcmk__sched_err(scheduler, "Could not allocate memory for node %s",
 484                         uname);
 485         return NULL;
 486     }
 487 
 488     crm_trace("Creating node for entry %s/%s", uname, id);
 489     new_node->assign->score = score;
 490     new_node->priv->id = id;
 491     new_node->priv->name = uname;
 492     new_node->priv->flags = pcmk__node_probes_allowed;
 493     new_node->details->online = FALSE;
 494     new_node->details->shutdown = FALSE;
 495     new_node->details->running_rsc = NULL;
 496     new_node->priv->scheduler = scheduler;
 497     new_node->priv->variant = variant;
 498     new_node->priv->attrs = pcmk__strkey_table(free, free);
 499     new_node->priv->utilization = pcmk__strkey_table(free, free);
 500     new_node->priv->digest_cache = pcmk__strkey_table(free, pe__free_digests);
 501 
 502     if (pcmk__is_pacemaker_remote_node(new_node)) {
 503         pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "remote");
 504         pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_remote_nodes);
 505     } else {
 506         pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "cluster");
 507     }
 508 
 509     scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
 510                                             pe__cmp_node_name);
 511     return new_node;
 512 }
 513 
 514 static const char *
 515 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
     /* [previous][next][first][last][top][bottom][index][help] */
 516 {
 517     xmlNode *attr_set = NULL;
 518     xmlNode *attr = NULL;
 519 
 520     const char *container_id = pcmk__xe_id(xml_obj);
 521     const char *remote_name = NULL;
 522     const char *remote_server = NULL;
 523     const char *remote_port = NULL;
 524     const char *connect_timeout = "60s";
 525     const char *remote_allow_migrate=NULL;
 526     const char *is_managed = NULL;
 527 
 528     for (attr_set = pcmk__xe_first_child(xml_obj, PCMK_XE_META_ATTRIBUTES,
 529                                          NULL, NULL);
 530          attr_set != NULL;
 531          attr_set = pcmk__xe_next(attr_set, PCMK_XE_META_ATTRIBUTES)) {
 532 
 533         for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
 534              attr != NULL; attr = pcmk__xe_next(attr, NULL)) {
 535 
 536             const char *value = crm_element_value(attr, PCMK_XA_VALUE);
 537             const char *name = crm_element_value(attr, PCMK_XA_NAME);
 538 
 539             if (name == NULL) { // Sanity
 540                 continue;
 541             }
 542 
 543             if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
 544                 remote_name = value;
 545 
 546             } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
 547                 remote_server = value;
 548 
 549             } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
 550                 remote_port = value;
 551 
 552             } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
 553                 connect_timeout = value;
 554 
 555             } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
 556                 remote_allow_migrate = value;
 557 
 558             } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
 559                 is_managed = value;
 560             }
 561         }
 562     }
 563 
 564     if (remote_name == NULL) {
 565         return NULL;
 566     }
 567 
 568     if (pe_find_resource(data->priv->resources, remote_name) != NULL) {
 569         return NULL;
 570     }
 571 
 572     pe_create_remote_xml(parent, remote_name, container_id,
 573                          remote_allow_migrate, is_managed,
 574                          connect_timeout, remote_server, remote_port);
 575     return remote_name;
 576 }
 577 
 578 static void
 579 handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
     /* [previous][next][first][last][top][bottom][index][help] */
 580 {
 581     if ((new_node->priv->variant == pcmk__node_variant_remote)
 582         && (new_node->priv->remote == NULL)) {
 583         /* Ignore fencing for remote nodes that don't have a connection resource
 584          * associated with them. This happens when remote node entries get left
 585          * in the nodes section after the connection resource is removed.
 586          */
 587         return;
 588     }
 589 
 590     if (pcmk_is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
 591         // All nodes are unclean until we've seen their status entry
 592         new_node->details->unclean = TRUE;
 593 
 594     } else {
 595         // Blind faith ...
 596         new_node->details->unclean = FALSE;
 597     }
 598 }
 599 
 600 gboolean
 601 unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 602 {
 603     xmlNode *xml_obj = NULL;
 604     pcmk_node_t *new_node = NULL;
 605     const char *id = NULL;
 606     const char *uname = NULL;
 607     const char *type = NULL;
 608 
 609     for (xml_obj = pcmk__xe_first_child(xml_nodes, PCMK_XE_NODE, NULL, NULL);
 610          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, PCMK_XE_NODE)) {
 611 
 612         int score = 0;
 613         int rc = pcmk__xe_get_score(xml_obj, PCMK_XA_SCORE, &score, 0);
 614 
 615         new_node = NULL;
 616 
 617         id = crm_element_value(xml_obj, PCMK_XA_ID);
 618         uname = crm_element_value(xml_obj, PCMK_XA_UNAME);
 619         type = crm_element_value(xml_obj, PCMK_XA_TYPE);
 620         crm_trace("Processing node %s/%s", uname, id);
 621 
 622         if (id == NULL) {
 623             pcmk__config_err("Ignoring <" PCMK_XE_NODE
 624                              "> entry in configuration without id");
 625             continue;
 626         }
 627         if (rc != pcmk_rc_ok) {
 628             // Not possible with schema validation enabled
 629             pcmk__config_warn("Using 0 as score for node %s "
 630                               "because '%s' is not a valid score: %s",
 631                               pcmk__s(uname, "without name"),
 632                               crm_element_value(xml_obj, PCMK_XA_SCORE),
 633                               pcmk_rc_str(rc));
 634         }
 635         new_node = pe_create_node(id, uname, type, score, scheduler);
 636 
 637         if (new_node == NULL) {
 638             return FALSE;
 639         }
 640 
 641         handle_startup_fencing(scheduler, new_node);
 642 
 643         add_node_attrs(xml_obj, new_node, FALSE, scheduler);
 644 
 645         crm_trace("Done with node %s",
 646                   crm_element_value(xml_obj, PCMK_XA_UNAME));
 647     }
 648 
 649     return TRUE;
 650 }
 651 
 652 static void
 653 unpack_launcher(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 654 {
 655     const char *launcher_id = NULL;
 656 
 657     if (rsc->priv->children != NULL) {
 658         g_list_foreach(rsc->priv->children, (GFunc) unpack_launcher,
 659                        scheduler);
 660         return;
 661     }
 662 
 663     launcher_id = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CONTAINER);
 664     if ((launcher_id != NULL)
 665         && !pcmk__str_eq(launcher_id, rsc->id, pcmk__str_none)) {
 666         pcmk_resource_t *launcher = pe_find_resource(scheduler->priv->resources,
 667                                                      launcher_id);
 668 
 669         if (launcher != NULL) {
 670             rsc->priv->launcher = launcher;
 671             launcher->priv->launched =
 672                 g_list_append(launcher->priv->launched, rsc);
 673             pcmk__rsc_trace(rsc, "Resource %s's launcher is %s",
 674                             rsc->id, launcher_id);
 675         } else {
 676             pcmk__config_err("Resource %s: Unknown " PCMK__META_CONTAINER " %s",
 677                              rsc->id, launcher_id);
 678         }
 679     }
 680 }
 681 
 682 gboolean
 683 unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 684 {
 685     xmlNode *xml_obj = NULL;
 686 
 687     /* Create remote nodes and guest nodes from the resource configuration
 688      * before unpacking resources.
 689      */
 690     for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
 691          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
 692 
 693         const char *new_node_id = NULL;
 694 
 695         /* Check for remote nodes, which are defined by ocf:pacemaker:remote
 696          * primitives.
 697          */
 698         if (xml_contains_remote_node(xml_obj)) {
 699             new_node_id = pcmk__xe_id(xml_obj);
 700             /* The pcmk_find_node() check ensures we don't iterate over an
 701              * expanded node that has already been added to the node list
 702              */
 703             if (new_node_id
 704                 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
 705                 crm_trace("Found remote node %s defined by resource %s",
 706                           new_node_id, pcmk__xe_id(xml_obj));
 707                 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
 708                                0, scheduler);
 709             }
 710             continue;
 711         }
 712 
 713         /* Check for guest nodes, which are defined by special meta-attributes
 714          * of a primitive of any type (for example, VirtualDomain or Xen).
 715          */
 716         if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
 717             /* This will add an ocf:pacemaker:remote primitive to the
 718              * configuration for the guest node's connection, to be unpacked
 719              * later.
 720              */
 721             new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
 722                                                  scheduler);
 723             if (new_node_id
 724                 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
 725                 crm_trace("Found guest node %s in resource %s",
 726                           new_node_id, pcmk__xe_id(xml_obj));
 727                 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
 728                                0, scheduler);
 729             }
 730             continue;
 731         }
 732 
 733         /* Check for guest nodes inside a group. Clones are currently not
 734          * supported as guest nodes.
 735          */
 736         if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
 737             xmlNode *xml_obj2 = NULL;
 738             for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
 739                  xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2, NULL)) {
 740 
 741                 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
 742                                                      scheduler);
 743 
 744                 if (new_node_id
 745                     && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
 746                     crm_trace("Found guest node %s in resource %s inside group %s",
 747                               new_node_id, pcmk__xe_id(xml_obj2),
 748                               pcmk__xe_id(xml_obj));
 749                     pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
 750                                    0, scheduler);
 751                 }
 752             }
 753         }
 754     }
 755     return TRUE;
 756 }
 757 
 758 /* Call this after all the nodes and resources have been
 759  * unpacked, but before the status section is read.
 760  *
 761  * A remote node's online status is reflected by the state
 762  * of the remote node's connection resource. We need to link
 763  * the remote node to this connection resource so we can have
 764  * easy access to the connection resource during the scheduler calculations.
 765  */
 766 static void
 767 link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 768 {
 769     pcmk_node_t *remote_node = NULL;
 770 
 771     if (!pcmk_is_set(new_rsc->flags, pcmk__rsc_is_remote_connection)) {
 772         return;
 773     }
 774 
 775     if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) {
 776         /* remote_nodes and remote_resources are not linked in quick location calculations */
 777         return;
 778     }
 779 
 780     remote_node = pcmk_find_node(scheduler, new_rsc->id);
 781     CRM_CHECK(remote_node != NULL, return);
 782 
 783     pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
 784                     new_rsc->id, pcmk__node_name(remote_node));
 785     remote_node->priv->remote = new_rsc;
 786 
 787     if (new_rsc->priv->launcher == NULL) {
 788         /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
 789          * the same as is done for cluster nodes.
 790          */
 791         handle_startup_fencing(scheduler, remote_node);
 792 
 793     } else {
 794         /* pe_create_node() marks the new node as "remote" or "cluster"; now
 795          * that we know the node is a guest node, update it correctly.
 796          */
 797         pcmk__insert_dup(remote_node->priv->attrs,
 798                          CRM_ATTR_KIND, "container");
 799     }
 800 }
 801 
 802 /*!
 803  * \internal
 804  * \brief Parse configuration XML for resource information
 805  *
 806  * \param[in]     xml_resources  Top of resource configuration XML
 807  * \param[in,out] scheduler      Scheduler data
 808  *
 809  * \return TRUE
 810  *
 811  * \note unpack_remote_nodes() MUST be called before this, so that the nodes can
 812  *       be used when pe__unpack_resource() calls resource_location()
 813  */
 814 gboolean
 815 unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 816 {
 817     xmlNode *xml_obj = NULL;
 818     GList *gIter = NULL;
 819 
 820     scheduler->priv->templates = pcmk__strkey_table(free, pcmk__free_idref);
 821 
 822     for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
 823          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
 824 
 825         pcmk_resource_t *new_rsc = NULL;
 826         const char *id = pcmk__xe_id(xml_obj);
 827 
 828         if (pcmk__str_empty(id)) {
 829             pcmk__config_err("Ignoring <%s> resource without ID",
 830                              xml_obj->name);
 831             continue;
 832         }
 833 
 834         if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
 835             if (g_hash_table_lookup_extended(scheduler->priv->templates, id,
 836                                              NULL, NULL) == FALSE) {
 837                 /* Record the template's ID for the knowledge of its existence anyway. */
 838                 pcmk__insert_dup(scheduler->priv->templates, id, NULL);
 839             }
 840             continue;
 841         }
 842 
 843         crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
 844         if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
 845                                 scheduler) == pcmk_rc_ok) {
 846             scheduler->priv->resources =
 847                 g_list_append(scheduler->priv->resources, new_rsc);
 848             pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
 849 
 850         } else {
 851             pcmk__config_err("Ignoring <%s> resource '%s' "
 852                              "because configuration is invalid",
 853                              xml_obj->name, id);
 854         }
 855     }
 856 
 857     for (gIter = scheduler->priv->resources;
 858          gIter != NULL; gIter = gIter->next) {
 859 
 860         pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
 861 
 862         unpack_launcher(rsc, scheduler);
 863         link_rsc2remotenode(scheduler, rsc);
 864     }
 865 
 866     scheduler->priv->resources = g_list_sort(scheduler->priv->resources,
 867                                              pe__cmp_rsc_priority);
 868     if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) {
 869         /* Ignore */
 870 
 871     } else if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)
 872                && !pcmk_is_set(scheduler->flags, pcmk__sched_have_fencing)) {
 873 
 874         pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
 875         pcmk__config_err("Either configure some or disable STONITH with the "
 876                          PCMK_OPT_STONITH_ENABLED " option");
 877         pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
 878     }
 879 
 880     return TRUE;
 881 }
 882 
 883 /*!
 884  * \internal
 885  * \brief Validate the levels in a fencing topology
 886  *
 887  * \param[in] xml  \c PCMK_XE_FENCING_TOPOLOGY element
 888  */
 889 void
 890 pcmk__validate_fencing_topology(const xmlNode *xml)
     /* [previous][next][first][last][top][bottom][index][help] */
 891 {
 892     if (xml == NULL) {
 893         return;
 894     }
 895 
 896     CRM_CHECK(pcmk__xe_is(xml, PCMK_XE_FENCING_TOPOLOGY), return);
 897 
 898     for (const xmlNode *level = pcmk__xe_first_child(xml, PCMK_XE_FENCING_LEVEL,
 899                                                      NULL, NULL);
 900          level != NULL; level = pcmk__xe_next(level, PCMK_XE_FENCING_LEVEL)) {
 901 
 902         const char *id = pcmk__xe_id(level);
 903         int index = 0;
 904 
 905         if (pcmk__str_empty(id)) {
 906             pcmk__config_err("Ignoring fencing level without ID");
 907             continue;
 908         }
 909 
 910         if (crm_element_value_int(level, PCMK_XA_INDEX, &index) != 0) {
 911             pcmk__config_err("Ignoring fencing level %s with invalid index",
 912                              id);
 913             continue;
 914         }
 915 
 916         if ((index < ST__LEVEL_MIN) || (index > ST__LEVEL_MAX)) {
 917             pcmk__config_err("Ignoring fencing level %s with out-of-range "
 918                              "index %d",
 919                              id, index);
 920         }
 921     }
 922 }
 923 
 924 gboolean
 925 unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 926 {
 927     xmlNode *xml_tag = NULL;
 928 
 929     scheduler->priv->tags = pcmk__strkey_table(free, pcmk__free_idref);
 930 
 931     for (xml_tag = pcmk__xe_first_child(xml_tags, PCMK_XE_TAG, NULL, NULL);
 932          xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag, PCMK_XE_TAG)) {
 933 
 934         xmlNode *xml_obj_ref = NULL;
 935         const char *tag_id = pcmk__xe_id(xml_tag);
 936 
 937         if (tag_id == NULL) {
 938             pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
 939                              (const char *) xml_tag->name);
 940             continue;
 941         }
 942 
 943         for (xml_obj_ref = pcmk__xe_first_child(xml_tag, PCMK_XE_OBJ_REF,
 944                                                 NULL, NULL);
 945              xml_obj_ref != NULL;
 946              xml_obj_ref = pcmk__xe_next(xml_obj_ref, PCMK_XE_OBJ_REF)) {
 947 
 948             const char *obj_ref = pcmk__xe_id(xml_obj_ref);
 949 
 950             if (obj_ref == NULL) {
 951                 pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
 952                                  xml_obj_ref->name, tag_id);
 953                 continue;
 954             }
 955 
 956             pcmk__add_idref(scheduler->priv->tags, tag_id, obj_ref);
 957         }
 958     }
 959 
 960     return TRUE;
 961 }
 962 
 963 /*!
 964  * \internal
 965  * \brief Unpack a ticket state entry
 966  *
 967  * \param[in]     xml_ticket  XML ticket state to unpack
 968  * \param[in,out] userdata    Scheduler data
 969  *
 970  * \return pcmk_rc_ok (to always continue unpacking further entries)
 971  */
 972 static int
 973 unpack_ticket_state(xmlNode *xml_ticket, void *userdata)
     /* [previous][next][first][last][top][bottom][index][help] */
 974 {
 975     pcmk_scheduler_t *scheduler = userdata;
 976 
 977     const char *ticket_id = NULL;
 978     const char *granted = NULL;
 979     const char *last_granted = NULL;
 980     const char *standby = NULL;
 981     xmlAttrPtr xIter = NULL;
 982 
 983     pcmk__ticket_t *ticket = NULL;
 984 
 985     ticket_id = pcmk__xe_id(xml_ticket);
 986     if (pcmk__str_empty(ticket_id)) {
 987         return pcmk_rc_ok;
 988     }
 989 
 990     crm_trace("Processing ticket state for %s", ticket_id);
 991 
 992     ticket = g_hash_table_lookup(scheduler->priv->ticket_constraints,
 993                                  ticket_id);
 994     if (ticket == NULL) {
 995         ticket = ticket_new(ticket_id, scheduler);
 996         if (ticket == NULL) {
 997             return pcmk_rc_ok;
 998         }
 999     }
1000 
1001     for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1002         const char *prop_name = (const char *)xIter->name;
1003         const char *prop_value = pcmk__xml_attr_value(xIter);
1004 
1005         if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
1006             continue;
1007         }
1008         pcmk__insert_dup(ticket->state, prop_name, prop_value);
1009     }
1010 
1011     granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
1012     if (granted && crm_is_true(granted)) {
1013         pcmk__set_ticket_flags(ticket, pcmk__ticket_granted);
1014         crm_info("We have ticket '%s'", ticket->id);
1015     } else {
1016         pcmk__clear_ticket_flags(ticket, pcmk__ticket_granted);
1017         crm_info("We do not have ticket '%s'", ticket->id);
1018     }
1019 
1020     last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
1021     if (last_granted) {
1022         long long last_granted_ll = 0LL;
1023         int rc = pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1024 
1025         if (rc != pcmk_rc_ok) {
1026             crm_warn("Using %lld instead of invalid " PCMK_XA_LAST_GRANTED
1027                      " value '%s' in state for ticket %s: %s",
1028                      last_granted_ll, last_granted, ticket->id,
1029                      pcmk_rc_str(rc));
1030         }
1031         ticket->last_granted = (time_t) last_granted_ll;
1032     }
1033 
1034     standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
1035     if (standby && crm_is_true(standby)) {
1036         pcmk__set_ticket_flags(ticket, pcmk__ticket_standby);
1037         if (pcmk_is_set(ticket->flags, pcmk__ticket_granted)) {
1038             crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
1039         }
1040     } else {
1041         pcmk__clear_ticket_flags(ticket, pcmk__ticket_standby);
1042     }
1043 
1044     crm_trace("Done with ticket state for %s", ticket_id);
1045 
1046     return pcmk_rc_ok;
1047 }
1048 
1049 static void
1050 unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
     /* [previous][next][first][last][top][bottom][index][help] */
1051                            pcmk_scheduler_t *scheduler)
1052 {
1053     const char *discovery = NULL;
1054     const xmlNode *attrs = NULL;
1055     pcmk_resource_t *rsc = NULL;
1056     int maint = 0;
1057 
1058     if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1059         return;
1060     }
1061 
1062     if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1063         return;
1064     }
1065     crm_trace("Processing Pacemaker Remote node %s",
1066               pcmk__node_name(this_node));
1067 
1068     pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_IN_MAINTENANCE),
1069                        &maint, 0);
1070     if (maint) {
1071         pcmk__set_node_flags(this_node, pcmk__node_remote_maint);
1072     } else {
1073         pcmk__clear_node_flags(this_node, pcmk__node_remote_maint);
1074     }
1075 
1076     rsc = this_node->priv->remote;
1077     if (!pcmk_is_set(this_node->priv->flags, pcmk__node_remote_reset)) {
1078         this_node->details->unclean = FALSE;
1079         pcmk__set_node_flags(this_node, pcmk__node_seen);
1080     }
1081     attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL,
1082                                  NULL);
1083     add_node_attrs(attrs, this_node, TRUE, scheduler);
1084 
1085     if (pe__shutdown_requested(this_node)) {
1086         crm_info("%s is shutting down", pcmk__node_name(this_node));
1087         this_node->details->shutdown = TRUE;
1088     }
1089 
1090     if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL,
1091                                     pcmk__rsc_node_current))) {
1092         crm_info("%s is in standby mode", pcmk__node_name(this_node));
1093         pcmk__set_node_flags(this_node, pcmk__node_standby);
1094     }
1095 
1096     if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
1097                                     pcmk__rsc_node_current))
1098         || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed))) {
1099         crm_info("%s is in maintenance mode", pcmk__node_name(this_node));
1100         this_node->details->maintenance = TRUE;
1101     }
1102 
1103     discovery = pcmk__node_attr(this_node,
1104                                 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1105                                 NULL, pcmk__rsc_node_current);
1106     if ((discovery != NULL) && !crm_is_true(discovery)) {
1107         pcmk__warn_once(pcmk__wo_rdisc_enabled,
1108                         "Support for the "
1109                         PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1110                         " node attribute is deprecated and will be removed"
1111                         " (and behave as 'true') in a future release.");
1112 
1113         if (pcmk__is_remote_node(this_node)
1114             && !pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
1115             pcmk__config_warn("Ignoring "
1116                               PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1117                               " attribute on Pacemaker Remote node %s"
1118                               " because fencing is disabled",
1119                               pcmk__node_name(this_node));
1120         } else {
1121             /* This is either a remote node with fencing enabled, or a guest
1122              * node. We don't care whether fencing is enabled when fencing guest
1123              * nodes, because they are "fenced" by recovering their containing
1124              * resource.
1125              */
1126             crm_info("%s has resource discovery disabled",
1127                      pcmk__node_name(this_node));
1128             pcmk__clear_node_flags(this_node, pcmk__node_probes_allowed);
1129         }
1130     }
1131 }
1132 
1133 /*!
1134  * \internal
1135  * \brief Unpack a cluster node's transient attributes
1136  *
1137  * \param[in]     state      CIB node state XML
1138  * \param[in,out] node       Cluster node whose attributes are being unpacked
1139  * \param[in,out] scheduler  Scheduler data
1140  */
1141 static void
1142 unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1143                             pcmk_scheduler_t *scheduler)
1144 {
1145     const char *discovery = NULL;
1146     const xmlNode *attrs = pcmk__xe_first_child(state,
1147                                                 PCMK__XE_TRANSIENT_ATTRIBUTES,
1148                                                 NULL, NULL);
1149 
1150     add_node_attrs(attrs, node, TRUE, scheduler);
1151 
1152     if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_STANDBY, NULL,
1153                                     pcmk__rsc_node_current))) {
1154         crm_info("%s is in standby mode", pcmk__node_name(node));
1155         pcmk__set_node_flags(node, pcmk__node_standby);
1156     }
1157 
1158     if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
1159                                     pcmk__rsc_node_current))) {
1160         crm_info("%s is in maintenance mode", pcmk__node_name(node));
1161         node->details->maintenance = TRUE;
1162     }
1163 
1164     discovery = pcmk__node_attr(node,
1165                                 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1166                                 NULL, pcmk__rsc_node_current);
1167     if ((discovery != NULL) && !crm_is_true(discovery)) {
1168         pcmk__config_warn("Ignoring "
1169                           PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1170                           " attribute for %s because disabling resource"
1171                           " discovery is not allowed for cluster nodes",
1172                           pcmk__node_name(node));
1173     }
1174 }
1175 
1176 /*!
1177  * \internal
1178  * \brief Unpack a node state entry (first pass)
1179  *
1180  * Unpack one node state entry from status. This unpacks information from the
1181  * \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not
1182  * the resource history inside it. Multiple passes through the status are needed
1183  * to fully unpack everything.
1184  *
1185  * \param[in]     state      CIB node state XML
1186  * \param[in,out] scheduler  Scheduler data
1187  */
1188 static void
1189 unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1190 {
1191     const char *id = NULL;
1192     const char *uname = NULL;
1193     pcmk_node_t *this_node = NULL;
1194 
1195     id = crm_element_value(state, PCMK_XA_ID);
1196     if (id == NULL) {
1197         pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
1198                          PCMK_XA_ID);
1199         crm_log_xml_info(state, "missing-id");
1200         return;
1201     }
1202 
1203     uname = crm_element_value(state, PCMK_XA_UNAME);
1204     if (uname == NULL) {
1205         /* If a joining peer makes the cluster acquire the quorum from Corosync
1206          * but has not joined the controller CPG membership yet, it's possible
1207          * that the created PCMK__XE_NODE_STATE entry doesn't have a
1208          * PCMK_XA_UNAME yet. Recognize the node as pending and wait for it to
1209          * join CPG.
1210          */
1211         crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
1212                   "without " PCMK_XA_UNAME,
1213                   id);
1214     }
1215 
1216     this_node = pe_find_node_any(scheduler->nodes, id, uname);
1217     if (this_node == NULL) {
1218         crm_notice("Ignoring recorded state for removed node with name %s and "
1219                    PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id);
1220         return;
1221     }
1222 
1223     if (pcmk__is_pacemaker_remote_node(this_node)) {
1224         int remote_fenced = 0;
1225 
1226         /* We can't determine the online status of Pacemaker Remote nodes until
1227          * after all resource history has been unpacked. In this first pass, we
1228          * do need to mark whether the node has been fenced, as this plays a
1229          * role during unpacking cluster node resource state.
1230          */
1231         pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_FENCED),
1232                            &remote_fenced, 0);
1233         if (remote_fenced) {
1234             pcmk__set_node_flags(this_node, pcmk__node_remote_fenced);
1235         } else {
1236             pcmk__clear_node_flags(this_node, pcmk__node_remote_fenced);
1237         }
1238         return;
1239     }
1240 
1241     unpack_transient_attributes(state, this_node, scheduler);
1242 
1243     /* Provisionally mark this cluster node as clean. We have at least seen it
1244      * in the current cluster's lifetime.
1245      */
1246     this_node->details->unclean = FALSE;
1247     pcmk__set_node_flags(this_node, pcmk__node_seen);
1248 
1249     crm_trace("Determining online status of cluster node %s (id %s)",
1250               pcmk__node_name(this_node), id);
1251     determine_online_status(state, this_node, scheduler);
1252 
1253     if (!pcmk_is_set(scheduler->flags, pcmk__sched_quorate)
1254         && this_node->details->online
1255         && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) {
1256         /* Everything else should flow from this automatically
1257          * (at least until the scheduler becomes able to migrate off
1258          * healthy resources)
1259          */
1260         pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1261                       FALSE);
1262     }
1263 }
1264 
1265 /*!
1266  * \internal
1267  * \brief Unpack nodes' resource history as much as possible
1268  *
1269  * Unpack as many nodes' resource history as possible in one pass through the
1270  * status. We need to process Pacemaker Remote nodes' connections/containers
1271  * before unpacking their history; the connection/container history will be
1272  * in another node's history, so it might take multiple passes to unpack
1273  * everything.
1274  *
1275  * \param[in]     status     CIB XML status section
1276  * \param[in]     fence      If true, treat any not-yet-unpacked nodes as unseen
1277  * \param[in,out] scheduler  Scheduler data
1278  *
1279  * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done,
1280  *         or EAGAIN if more unpacking remains to be done)
1281  */
1282 static int
1283 unpack_node_history(const xmlNode *status, bool fence,
     /* [previous][next][first][last][top][bottom][index][help] */
1284                     pcmk_scheduler_t *scheduler)
1285 {
1286     int rc = pcmk_rc_ok;
1287 
1288     // Loop through all PCMK__XE_NODE_STATE entries in CIB status
1289     for (const xmlNode *state = pcmk__xe_first_child(status,
1290                                                      PCMK__XE_NODE_STATE, NULL,
1291                                                      NULL);
1292          state != NULL; state = pcmk__xe_next(state, PCMK__XE_NODE_STATE)) {
1293 
1294         const char *id = pcmk__xe_id(state);
1295         const char *uname = crm_element_value(state, PCMK_XA_UNAME);
1296         pcmk_node_t *this_node = NULL;
1297 
1298         if ((id == NULL) || (uname == NULL)) {
1299             // Warning already logged in first pass through status section
1300             crm_trace("Not unpacking resource history from malformed "
1301                       PCMK__XE_NODE_STATE " without id and/or uname");
1302             continue;
1303         }
1304 
1305         this_node = pe_find_node_any(scheduler->nodes, id, uname);
1306         if (this_node == NULL) {
1307             // Warning already logged in first pass through status section
1308             crm_trace("Not unpacking resource history for node %s because "
1309                       "no longer in configuration", id);
1310             continue;
1311         }
1312 
1313         if (pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1314             crm_trace("Not unpacking resource history for node %s because "
1315                       "already unpacked", id);
1316             continue;
1317         }
1318 
1319         if (fence) {
1320             // We're processing all remaining nodes
1321 
1322         } else if (pcmk__is_guest_or_bundle_node(this_node)) {
1323             /* We can unpack a guest node's history only after we've unpacked
1324              * other resource history to the point that we know that the node's
1325              * connection and containing resource are both up.
1326              */
1327             const pcmk_resource_t *remote = this_node->priv->remote;
1328             const pcmk_resource_t *launcher = remote->priv->launcher;
1329 
1330             if ((remote->priv->orig_role != pcmk_role_started)
1331                 || (launcher->priv->orig_role != pcmk_role_started)) {
1332                 crm_trace("Not unpacking resource history for guest node %s "
1333                           "because launcher and connection are not known to "
1334                           "be up", id);
1335                 continue;
1336             }
1337 
1338         } else if (pcmk__is_remote_node(this_node)) {
1339             /* We can unpack a remote node's history only after we've unpacked
1340              * other resource history to the point that we know that the node's
1341              * connection is up, with the exception of when shutdown locks are
1342              * in use.
1343              */
1344             pcmk_resource_t *rsc = this_node->priv->remote;
1345 
1346             if ((rsc == NULL)
1347                 || (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)
1348                     && (rsc->priv->orig_role != pcmk_role_started))) {
1349                 crm_trace("Not unpacking resource history for remote node %s "
1350                           "because connection is not known to be up", id);
1351                 continue;
1352             }
1353 
1354         /* If fencing and shutdown locks are disabled and we're not processing
1355          * unseen nodes, then we don't want to unpack offline nodes until online
1356          * nodes have been unpacked. This allows us to number active clone
1357          * instances first.
1358          */
1359         } else if (!pcmk_any_flags_set(scheduler->flags,
1360                                        pcmk__sched_fencing_enabled
1361                                        |pcmk__sched_shutdown_lock)
1362                    && !this_node->details->online) {
1363             crm_trace("Not unpacking resource history for offline "
1364                       "cluster node %s", id);
1365             continue;
1366         }
1367 
1368         if (pcmk__is_pacemaker_remote_node(this_node)) {
1369             determine_remote_online_status(scheduler, this_node);
1370             unpack_handle_remote_attrs(this_node, state, scheduler);
1371         }
1372 
1373         crm_trace("Unpacking resource history for %snode %s",
1374                   (fence? "unseen " : ""), id);
1375 
1376         pcmk__set_node_flags(this_node, pcmk__node_unpacked);
1377         unpack_node_lrm(this_node, state, scheduler);
1378 
1379         rc = EAGAIN; // Other node histories might depend on this one
1380     }
1381     return rc;
1382 }
1383 
1384 /* remove nodes that are down, stopping */
1385 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1386 /* anything else? */
1387 gboolean
1388 unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1389 {
1390     xmlNode *state = NULL;
1391 
1392     crm_trace("Beginning unpack");
1393 
1394     if (scheduler->priv->ticket_constraints == NULL) {
1395         scheduler->priv->ticket_constraints =
1396             pcmk__strkey_table(free, destroy_ticket);
1397     }
1398 
1399     for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
1400          state = pcmk__xe_next(state, NULL)) {
1401 
1402         if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
1403             pcmk__xe_foreach_child(state, PCMK__XE_TICKET_STATE,
1404                                    unpack_ticket_state, scheduler);
1405 
1406         } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1407             unpack_node_state(state, scheduler);
1408         }
1409     }
1410 
1411     while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1412         crm_trace("Another pass through node resource histories is needed");
1413     }
1414 
1415     // Now catch any nodes we didn't see
1416     unpack_node_history(status,
1417                         pcmk_is_set(scheduler->flags,
1418                                     pcmk__sched_fencing_enabled),
1419                         scheduler);
1420 
1421     /* Now that we know where resources are, we can schedule stops of containers
1422      * with failed bundle connections
1423      */
1424     if (scheduler->priv->stop_needed != NULL) {
1425         for (GList *item = scheduler->priv->stop_needed;
1426              item != NULL; item = item->next) {
1427 
1428             pcmk_resource_t *container = item->data;
1429             pcmk_node_t *node = pcmk__current_node(container);
1430 
1431             if (node) {
1432                 stop_action(container, node, FALSE);
1433             }
1434         }
1435         g_list_free(scheduler->priv->stop_needed);
1436         scheduler->priv->stop_needed = NULL;
1437     }
1438 
1439     /* Now that we know status of all Pacemaker Remote connections and nodes,
1440      * we can stop connections for node shutdowns, and check the online status
1441      * of remote/guest nodes that didn't have any node history to unpack.
1442      */
1443     for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1444         pcmk_node_t *this_node = gIter->data;
1445 
1446         if (!pcmk__is_pacemaker_remote_node(this_node)) {
1447             continue;
1448         }
1449         if (this_node->details->shutdown
1450             && (this_node->priv->remote != NULL)) {
1451             pe__set_next_role(this_node->priv->remote, pcmk_role_stopped,
1452                               "remote shutdown");
1453         }
1454         if (!pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1455             determine_remote_online_status(scheduler, this_node);
1456         }
1457     }
1458 
1459     return TRUE;
1460 }
1461 
1462 /*!
1463  * \internal
1464  * \brief Unpack node's time when it became a member at the cluster layer
1465  *
1466  * \param[in]     node_state  Node's \c PCMK__XE_NODE_STATE entry
1467  * \param[in,out] scheduler   Scheduler data
1468  *
1469  * \return Epoch time when node became a cluster member
1470  *         (or scheduler effective time for legacy entries) if a member,
1471  *         0 if not a member, or -1 if no valid information available
1472  */
1473 static long long
1474 unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1475 {
1476     const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
1477     int member = 0;
1478 
1479     if (member_time == NULL) {
1480         return -1LL;
1481 
1482     } else if (crm_str_to_boolean(member_time, &member) == 1) {
1483         /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1484          * recorded as a boolean for a DC < 2.1.7, or the node is pending
1485          * shutdown and has left the CPG, in which case it was set to 1 to avoid
1486          * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT.
1487          *
1488          * We return the effective time for in_ccm=1 because what's important to
1489          * avoid fencing is that effective time minus this value is less than
1490          * the pending node timeout.
1491          */
1492         return member? (long long) get_effective_time(scheduler) : 0LL;
1493 
1494     } else {
1495         long long when_member = 0LL;
1496 
1497         if ((pcmk__scan_ll(member_time, &when_member,
1498                            0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1499             crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
1500                      " in " PCMK__XE_NODE_STATE " entry", member_time);
1501             return -1LL;
1502         }
1503         return when_member;
1504     }
1505 }
1506 
1507 /*!
1508  * \internal
1509  * \brief Unpack node's time when it became online in process group
1510  *
1511  * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry
1512  *
1513  * \return Epoch time when node became online in process group (or 0 if not
1514  *         online, or 1 for legacy online entries)
1515  */
1516 static long long
1517 unpack_node_online(const xmlNode *node_state)
     /* [previous][next][first][last][top][bottom][index][help] */
1518 {
1519     const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD);
1520 
1521     // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1522     if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
1523                      pcmk__str_casei|pcmk__str_null_matches)) {
1524         return 0LL;
1525 
1526     } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1527         return 1LL;
1528 
1529     } else {
1530         long long when_online = 0LL;
1531 
1532         if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1533             || (when_online < 0)) {
1534             crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
1535                      PCMK__XE_NODE_STATE " entry, assuming offline", peer_time);
1536             return 0LL;
1537         }
1538         return when_online;
1539     }
1540 }
1541 
1542 /*!
1543  * \internal
1544  * \brief Unpack node attribute for user-requested fencing
1545  *
1546  * \param[in] node        Node to check
1547  * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry in CIB status
1548  *
1549  * \return \c true if fencing has been requested for \p node, otherwise \c false
1550  */
1551 static bool
1552 unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
     /* [previous][next][first][last][top][bottom][index][help] */
1553 {
1554     long long value = 0LL;
1555     int value_i = 0;
1556     int rc = pcmk_rc_ok;
1557     const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
1558                                           NULL, pcmk__rsc_node_current);
1559 
1560     // Value may be boolean or an epoch time
1561     if (crm_str_to_boolean(value_s, &value_i) == 1) {
1562         return (value_i != 0);
1563     }
1564     rc = pcmk__scan_ll(value_s, &value, 0LL);
1565     if (rc == pcmk_rc_ok) {
1566         return (value > 0);
1567     }
1568     crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1569              "node attribute for %s: %s",
1570              value_s, pcmk__node_name(node), pcmk_rc_str(rc));
1571     return false;
1572 }
1573 
1574 static gboolean
1575 determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
     /* [previous][next][first][last][top][bottom][index][help] */
1576                                    const xmlNode *node_state,
1577                                    pcmk_node_t *this_node)
1578 {
1579     gboolean online = FALSE;
1580     const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1581     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1582     long long when_member = unpack_node_member(node_state, scheduler);
1583     long long when_online = unpack_node_online(node_state);
1584 
1585     if (when_member <= 0) {
1586         crm_trace("Node %s is %sdown", pcmk__node_name(this_node),
1587                   ((when_member < 0)? "presumed " : ""));
1588 
1589     } else if (when_online > 0) {
1590         if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1591             online = TRUE;
1592         } else {
1593             crm_debug("Node %s is not ready to run resources: %s",
1594                       pcmk__node_name(this_node), join);
1595         }
1596 
1597     } else if (!pcmk_is_set(this_node->priv->flags,
1598                             pcmk__node_expected_up)) {
1599         crm_trace("Node %s controller is down: "
1600                   "member@%lld online@%lld join=%s expected=%s",
1601                   pcmk__node_name(this_node), when_member, when_online,
1602                   pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1603 
1604     } else {
1605         /* mark it unclean */
1606         pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1607         crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
1608                  pcmk__node_name(this_node), when_member, when_online,
1609                  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1610     }
1611     return online;
1612 }
1613 
1614 /*!
1615  * \internal
1616  * \brief Check whether a node has taken too long to join controller group
1617  *
1618  * \param[in,out] scheduler    Scheduler data
1619  * \param[in]     node         Node to check
1620  * \param[in]     when_member  Epoch time when node became a cluster member
1621  * \param[in]     when_online  Epoch time when node joined controller group
1622  *
1623  * \return true if node has been pending (on the way up) longer than
1624  *         \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false
1625  * \note This will also update the cluster's recheck time if appropriate.
1626  */
1627 static inline bool
1628 pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1629                  long long when_member, long long when_online)
1630 {
1631     if ((scheduler->priv->node_pending_ms > 0U)
1632         && (when_member > 0) && (when_online <= 0)) {
1633         // There is a timeout on pending nodes, and node is pending
1634 
1635         time_t timeout = when_member
1636                          + pcmk__timeout_ms2s(scheduler->priv->node_pending_ms);
1637 
1638         if (get_effective_time(node->priv->scheduler) >= timeout) {
1639             return true; // Node has timed out
1640         }
1641 
1642         // Node is pending, but still has time
1643         pe__update_recheck_time(timeout, scheduler, "pending node timeout");
1644     }
1645     return false;
1646 }
1647 
1648 static bool
1649 determine_online_status_fencing(pcmk_scheduler_t *scheduler,
     /* [previous][next][first][last][top][bottom][index][help] */
1650                                 const xmlNode *node_state,
1651                                 pcmk_node_t *this_node)
1652 {
1653     bool termination_requested = unpack_node_terminate(this_node, node_state);
1654     const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1655     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1656     long long when_member = unpack_node_member(node_state, scheduler);
1657     long long when_online = unpack_node_online(node_state);
1658 
1659 /*
1660   - PCMK__XA_JOIN          ::= member|down|pending|banned
1661   - PCMK_XA_EXPECTED       ::= member|down
1662 
1663   @COMPAT with entries recorded for DCs < 2.1.7
1664   - PCMK__XA_IN_CCM        ::= true|false
1665   - PCMK_XA_CRMD           ::= online|offline
1666 
1667   Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1668   - PCMK__XA_IN_CCM        ::= <timestamp>|0
1669   Since when node has been a cluster member. A value 0 of means the node is not
1670   a cluster member.
1671 
1672   - PCMK_XA_CRMD           ::= <timestamp>|0
1673   Since when peer has been online in CPG. A value 0 means the peer is offline
1674   in CPG.
1675 */
1676 
1677     crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1678               pcmk__node_name(this_node), when_member, when_online,
1679               pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1680               (termination_requested? " (termination requested)" : ""));
1681 
1682     if (this_node->details->shutdown) {
1683         crm_debug("%s is shutting down", pcmk__node_name(this_node));
1684 
1685         /* Slightly different criteria since we can't shut down a dead peer */
1686         return (when_online > 0);
1687     }
1688 
1689     if (when_member < 0) {
1690         pe_fence_node(scheduler, this_node,
1691                       "peer has not been seen by the cluster", FALSE);
1692         return false;
1693     }
1694 
1695     if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1696         pe_fence_node(scheduler, this_node,
1697                       "peer failed Pacemaker membership criteria", FALSE);
1698 
1699     } else if (termination_requested) {
1700         if ((when_member <= 0) && (when_online <= 0)
1701             && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1702             crm_info("%s was fenced as requested", pcmk__node_name(this_node));
1703             return false;
1704         }
1705         pe_fence_node(scheduler, this_node, "fencing was requested", false);
1706 
1707     } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1708                             pcmk__str_null_matches)) {
1709 
1710         if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1711             pe_fence_node(scheduler, this_node,
1712                           "peer pending timed out on joining the process group",
1713                           FALSE);
1714 
1715         } else if ((when_member > 0) || (when_online > 0)) {
1716             crm_info("- %s is not ready to run resources",
1717                      pcmk__node_name(this_node));
1718             pcmk__set_node_flags(this_node, pcmk__node_standby);
1719             this_node->details->pending = TRUE;
1720 
1721         } else {
1722             crm_trace("%s is down or still coming up",
1723                       pcmk__node_name(this_node));
1724         }
1725 
1726     } else if (when_member <= 0) {
1727         // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
1728         pe_fence_node(scheduler, this_node,
1729                       "peer is no longer part of the cluster", TRUE);
1730 
1731     } else if (when_online <= 0) {
1732         pe_fence_node(scheduler, this_node,
1733                       "peer process is no longer available", FALSE);
1734 
1735         /* Everything is running at this point, now check join state */
1736 
1737     } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1738         crm_info("%s is active", pcmk__node_name(this_node));
1739 
1740     } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
1741                                 CRMD_JOINSTATE_DOWN, NULL)) {
1742         crm_info("%s is not ready to run resources",
1743                  pcmk__node_name(this_node));
1744         pcmk__set_node_flags(this_node, pcmk__node_standby);
1745         this_node->details->pending = TRUE;
1746 
1747     } else {
1748         pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1749                       FALSE);
1750     }
1751 
1752     return (when_member > 0);
1753 }
1754 
1755 static void
1756 determine_remote_online_status(pcmk_scheduler_t *scheduler,
     /* [previous][next][first][last][top][bottom][index][help] */
1757                                pcmk_node_t *this_node)
1758 {
1759     pcmk_resource_t *rsc = this_node->priv->remote;
1760     pcmk_resource_t *launcher = NULL;
1761     pcmk_node_t *host = NULL;
1762     const char *node_type = "Remote";
1763 
1764     if (rsc == NULL) {
1765         /* This is a leftover node state entry for a former Pacemaker Remote
1766          * node whose connection resource was removed. Consider it offline.
1767          */
1768         crm_trace("Pacemaker Remote node %s is considered OFFLINE because "
1769                   "its connection resource has been removed from the CIB",
1770                   this_node->priv->id);
1771         this_node->details->online = FALSE;
1772         return;
1773     }
1774 
1775     launcher = rsc->priv->launcher;
1776     if (launcher != NULL) {
1777         node_type = "Guest";
1778         if (pcmk__list_of_1(rsc->priv->active_nodes)) {
1779             host = rsc->priv->active_nodes->data;
1780         }
1781     }
1782 
1783     /* If the resource is currently started, mark it online. */
1784     if (rsc->priv->orig_role == pcmk_role_started) {
1785         this_node->details->online = TRUE;
1786     }
1787 
1788     /* consider this node shutting down if transitioning start->stop */
1789     if ((rsc->priv->orig_role == pcmk_role_started)
1790         && (rsc->priv->next_role == pcmk_role_stopped)) {
1791 
1792         crm_trace("%s node %s shutting down because connection resource is stopping",
1793                   node_type, this_node->priv->id);
1794         this_node->details->shutdown = TRUE;
1795     }
1796 
1797     /* Now check all the failure conditions. */
1798     if ((launcher != NULL) && pcmk_is_set(launcher->flags, pcmk__rsc_failed)) {
1799         crm_trace("Guest node %s UNCLEAN because guest resource failed",
1800                   this_node->priv->id);
1801         this_node->details->online = FALSE;
1802         pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
1803 
1804     } else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
1805         crm_trace("%s node %s OFFLINE because connection resource failed",
1806                   node_type, this_node->priv->id);
1807         this_node->details->online = FALSE;
1808 
1809     } else if ((rsc->priv->orig_role == pcmk_role_stopped)
1810                || ((launcher != NULL)
1811                    && (launcher->priv->orig_role == pcmk_role_stopped))) {
1812 
1813         crm_trace("%s node %s OFFLINE because its resource is stopped",
1814                   node_type, this_node->priv->id);
1815         this_node->details->online = FALSE;
1816         pcmk__clear_node_flags(this_node, pcmk__node_remote_reset);
1817 
1818     } else if (host && (host->details->online == FALSE)
1819                && host->details->unclean) {
1820         crm_trace("Guest node %s UNCLEAN because host is unclean",
1821                   this_node->priv->id);
1822         this_node->details->online = FALSE;
1823         pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
1824 
1825     } else {
1826         crm_trace("%s node %s is %s",
1827                   node_type, this_node->priv->id,
1828                   this_node->details->online? "ONLINE" : "OFFLINE");
1829     }
1830 }
1831 
1832 static void
1833 determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
     /* [previous][next][first][last][top][bottom][index][help] */
1834                         pcmk_scheduler_t *scheduler)
1835 {
1836     gboolean online = FALSE;
1837     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1838 
1839     CRM_CHECK(this_node != NULL, return);
1840 
1841     this_node->details->shutdown = FALSE;
1842 
1843     if (pe__shutdown_requested(this_node)) {
1844         this_node->details->shutdown = TRUE;
1845 
1846     } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1847         pcmk__set_node_flags(this_node, pcmk__node_expected_up);
1848     }
1849 
1850     if (!pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
1851         online = determine_online_status_no_fencing(scheduler, node_state,
1852                                                     this_node);
1853 
1854     } else {
1855         online = determine_online_status_fencing(scheduler, node_state,
1856                                                  this_node);
1857     }
1858 
1859     if (online) {
1860         this_node->details->online = TRUE;
1861 
1862     } else {
1863         /* remove node from contention */
1864         this_node->assign->score = -PCMK_SCORE_INFINITY;
1865     }
1866 
1867     if (online && this_node->details->shutdown) {
1868         /* don't run resources here */
1869         this_node->assign->score = -PCMK_SCORE_INFINITY;
1870     }
1871 
1872     if (this_node->details->unclean) {
1873         pcmk__sched_warn(scheduler, "%s is unclean",
1874                          pcmk__node_name(this_node));
1875 
1876     } else if (!this_node->details->online) {
1877         crm_trace("%s is offline", pcmk__node_name(this_node));
1878 
1879     } else if (this_node->details->shutdown) {
1880         crm_info("%s is shutting down", pcmk__node_name(this_node));
1881 
1882     } else if (this_node->details->pending) {
1883         crm_info("%s is pending", pcmk__node_name(this_node));
1884 
1885     } else if (pcmk_is_set(this_node->priv->flags, pcmk__node_standby)) {
1886         crm_info("%s is in standby", pcmk__node_name(this_node));
1887 
1888     } else if (this_node->details->maintenance) {
1889         crm_info("%s is in maintenance", pcmk__node_name(this_node));
1890 
1891     } else {
1892         crm_info("%s is online", pcmk__node_name(this_node));
1893     }
1894 }
1895 
1896 /*!
1897  * \internal
1898  * \brief Find the end of a resource's name, excluding any clone suffix
1899  *
1900  * \param[in] id  Resource ID to check
1901  *
1902  * \return Pointer to last character of resource's base name
1903  */
1904 const char *
1905 pe_base_name_end(const char *id)
     /* [previous][next][first][last][top][bottom][index][help] */
1906 {
1907     if (!pcmk__str_empty(id)) {
1908         const char *end = id + strlen(id) - 1;
1909 
1910         for (const char *s = end; s > id; --s) {
1911             switch (*s) {
1912                 case '0':
1913                 case '1':
1914                 case '2':
1915                 case '3':
1916                 case '4':
1917                 case '5':
1918                 case '6':
1919                 case '7':
1920                 case '8':
1921                 case '9':
1922                     break;
1923                 case ':':
1924                     return (s == end)? s : (s - 1);
1925                 default:
1926                     return end;
1927             }
1928         }
1929         return end;
1930     }
1931     return NULL;
1932 }
1933 
1934 /*!
1935  * \internal
1936  * \brief Get a resource name excluding any clone suffix
1937  *
1938  * \param[in] last_rsc_id  Resource ID to check
1939  *
1940  * \return Pointer to newly allocated string with resource's base name
1941  * \note It is the caller's responsibility to free() the result.
1942  *       This asserts on error, so callers can assume result is not NULL.
1943  */
1944 char *
1945 clone_strip(const char *last_rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1946 {
1947     const char *end = pe_base_name_end(last_rsc_id);
1948     char *basename = NULL;
1949 
1950     pcmk__assert(end != NULL);
1951     basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1952     pcmk__assert(basename != NULL);
1953     return basename;
1954 }
1955 
1956 /*!
1957  * \internal
1958  * \brief Get the name of the first instance of a cloned resource
1959  *
1960  * \param[in] last_rsc_id  Resource ID to check
1961  *
1962  * \return Pointer to newly allocated string with resource's base name plus :0
1963  * \note It is the caller's responsibility to free() the result.
1964  *       This asserts on error, so callers can assume result is not NULL.
1965  */
1966 char *
1967 clone_zero(const char *last_rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1968 {
1969     const char *end = pe_base_name_end(last_rsc_id);
1970     size_t base_name_len = end - last_rsc_id + 1;
1971     char *zero = NULL;
1972 
1973     pcmk__assert(end != NULL);
1974     zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
1975     memcpy(zero, last_rsc_id, base_name_len);
1976     zero[base_name_len] = ':';
1977     zero[base_name_len + 1] = '0';
1978     return zero;
1979 }
1980 
1981 static pcmk_resource_t *
1982 create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
     /* [previous][next][first][last][top][bottom][index][help] */
1983                      pcmk_scheduler_t *scheduler)
1984 {
1985     pcmk_resource_t *rsc = NULL;
1986     xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
1987 
1988     pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
1989     crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
1990     crm_log_xml_debug(xml_rsc, "Orphan resource");
1991 
1992     if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
1993         return NULL;
1994     }
1995 
1996     if (xml_contains_remote_node(xml_rsc)) {
1997         pcmk_node_t *node;
1998 
1999         crm_debug("Detected orphaned remote node %s", rsc_id);
2000         node = pcmk_find_node(scheduler, rsc_id);
2001         if (node == NULL) {
2002             node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, 0,
2003                                   scheduler);
2004         }
2005         link_rsc2remotenode(scheduler, rsc);
2006 
2007         if (node) {
2008             crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
2009             node->details->shutdown = TRUE;
2010         }
2011     }
2012 
2013     if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) {
2014         // This removed resource needs to be mapped to a launcher
2015         crm_trace("Launched resource %s was removed from the configuration",
2016                   rsc_id);
2017         pcmk__set_rsc_flags(rsc, pcmk__rsc_removed_launched);
2018     }
2019     pcmk__set_rsc_flags(rsc, pcmk__rsc_removed);
2020     scheduler->priv->resources = g_list_append(scheduler->priv->resources, rsc);
2021     return rsc;
2022 }
2023 
2024 /*!
2025  * \internal
2026  * \brief Create orphan instance for anonymous clone resource history
2027  *
2028  * \param[in,out] parent     Clone resource that orphan will be added to
2029  * \param[in]     rsc_id     Orphan's resource ID
2030  * \param[in]     node       Where orphan is active (for logging only)
2031  * \param[in,out] scheduler  Scheduler data
2032  *
2033  * \return Newly added orphaned instance of \p parent
2034  */
2035 static pcmk_resource_t *
2036 create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
     /* [previous][next][first][last][top][bottom][index][help] */
2037                         const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2038 {
2039     pcmk_resource_t *top = pe__create_clone_child(parent, scheduler);
2040     pcmk_resource_t *orphan = NULL;
2041 
2042     // find_rsc() because we might be a cloned group
2043     orphan = top->priv->fns->find_rsc(top, rsc_id, NULL,
2044                                       pcmk_rsc_match_clone_only);
2045 
2046     pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s",
2047                     top->id, parent->id, rsc_id, pcmk__node_name(node));
2048     return orphan;
2049 }
2050 
2051 /*!
2052  * \internal
2053  * \brief Check a node for an instance of an anonymous clone
2054  *
2055  * Return a child instance of the specified anonymous clone, in order of
2056  * preference: (1) the instance running on the specified node, if any;
2057  * (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX
2058  * instances); (3) a newly created orphan (that is, \c PCMK_META_CLONE_MAX
2059  * instances are already active).
2060  *
2061  * \param[in,out] scheduler  Scheduler data
2062  * \param[in]     node       Node on which to check for instance
2063  * \param[in,out] parent     Clone to check
2064  * \param[in]     rsc_id     Name of cloned resource in history (no instance)
2065  */
2066 static pcmk_resource_t *
2067 find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2068                      pcmk_resource_t *parent, const char *rsc_id)
2069 {
2070     GList *rIter = NULL;
2071     pcmk_resource_t *rsc = NULL;
2072     pcmk_resource_t *inactive_instance = NULL;
2073     gboolean skip_inactive = FALSE;
2074 
2075     pcmk__assert(pcmk__is_anonymous_clone(parent));
2076 
2077     // Check for active (or partially active, for cloned groups) instance
2078     pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
2079                     rsc_id, pcmk__node_name(node), parent->id);
2080 
2081     for (rIter = parent->priv->children;
2082          (rIter != NULL) && (rsc == NULL); rIter = rIter->next) {
2083 
2084         GList *locations = NULL;
2085         pcmk_resource_t *child = rIter->data;
2086 
2087         /* Check whether this instance is already known to be active or pending
2088          * anywhere, at this stage of unpacking. Because this function is called
2089          * for a resource before the resource's individual operation history
2090          * entries are unpacked, locations will generally not contain the
2091          * desired node.
2092          *
2093          * However, there are three exceptions:
2094          * (1) when child is a cloned group and we have already unpacked the
2095          *     history of another member of the group on the same node;
2096          * (2) when we've already unpacked the history of another numbered
2097          *     instance on the same node (which can happen if
2098          *     PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
2099          * (3) when we re-run calculations on the same scheduler data as part of
2100          *     a simulation.
2101          */
2102         child->priv->fns->location(child, &locations, pcmk__rsc_node_current
2103                                                       |pcmk__rsc_node_pending);
2104         if (locations) {
2105             /* We should never associate the same numbered anonymous clone
2106              * instance with multiple nodes, and clone instances can't migrate,
2107              * so there must be only one location, regardless of history.
2108              */
2109             CRM_LOG_ASSERT(locations->next == NULL);
2110 
2111             if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
2112                 /* This child instance is active on the requested node, so check
2113                  * for a corresponding configured resource. We use find_rsc()
2114                  * instead of child because child may be a cloned group, and we
2115                  * need the particular member corresponding to rsc_id.
2116                  *
2117                  * If the history entry is orphaned, rsc will be NULL.
2118                  */
2119                 rsc = parent->priv->fns->find_rsc(child, rsc_id, NULL,
2120                                                   pcmk_rsc_match_clone_only);
2121                 if (rsc) {
2122                     /* If there are multiple instance history entries for an
2123                      * anonymous clone in a single node's history (which can
2124                      * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
2125                      * to false), we want to consider the instances beyond the
2126                      * first as orphans, even if there are inactive instance
2127                      * numbers available.
2128                      */
2129                     if (rsc->priv->active_nodes != NULL) {
2130                         crm_notice("Active (now-)anonymous clone %s has "
2131                                    "multiple (orphan) instance histories on %s",
2132                                    parent->id, pcmk__node_name(node));
2133                         skip_inactive = TRUE;
2134                         rsc = NULL;
2135                     } else {
2136                         pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
2137                     }
2138                 }
2139             }
2140             g_list_free(locations);
2141 
2142         } else {
2143             pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
2144             if (!skip_inactive && !inactive_instance
2145                 && !pcmk_is_set(child->flags, pcmk__rsc_blocked)) {
2146                 // Remember one inactive instance in case we don't find active
2147                 inactive_instance =
2148                     parent->priv->fns->find_rsc(child, rsc_id, NULL,
2149                                                 pcmk_rsc_match_clone_only);
2150 
2151                 /* ... but don't use it if it was already associated with a
2152                  * pending action on another node
2153                  */
2154                 if (inactive_instance != NULL) {
2155                     const pcmk_node_t *pending_node = NULL;
2156 
2157                     pending_node = inactive_instance->priv->pending_node;
2158                     if ((pending_node != NULL)
2159                         && !pcmk__same_node(pending_node, node)) {
2160                         inactive_instance = NULL;
2161                     }
2162                 }
2163             }
2164         }
2165     }
2166 
2167     if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2168         pcmk__rsc_trace(parent, "Resource %s, empty slot",
2169                         inactive_instance->id);
2170         rsc = inactive_instance;
2171     }
2172 
2173     /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
2174      * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
2175      * don't want to consume a valid instance number for unclean nodes. Such
2176      * instances may appear to be active according to the history, but should be
2177      * considered inactive, so we can start an instance elsewhere. Treat such
2178      * instances as orphans.
2179      *
2180      * An exception is instances running on guest nodes -- since guest node
2181      * "fencing" is actually just a resource stop, requires shouldn't apply.
2182      *
2183      * @TODO Ideally, we'd use an inactive instance number if it is not needed
2184      * for any clean instances. However, we don't know that at this point.
2185      */
2186     if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing)
2187         && (!node->details->online || node->details->unclean)
2188         && !pcmk__is_guest_or_bundle_node(node)
2189         && !pe__is_universal_clone(parent, scheduler)) {
2190 
2191         rsc = NULL;
2192     }
2193 
2194     if (rsc == NULL) {
2195         rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
2196         pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id);
2197     }
2198     return rsc;
2199 }
2200 
2201 static pcmk_resource_t *
2202 unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2203                      const char *rsc_id)
2204 {
2205     pcmk_resource_t *rsc = NULL;
2206     pcmk_resource_t *parent = NULL;
2207 
2208     crm_trace("looking for %s", rsc_id);
2209     rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2210 
2211     if (rsc == NULL) {
2212         /* If we didn't find the resource by its name in the operation history,
2213          * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
2214          * we create a single :0 orphan to match against here.
2215          */
2216         char *clone0_id = clone_zero(rsc_id);
2217         pcmk_resource_t *clone0 = pe_find_resource(scheduler->priv->resources,
2218                                                    clone0_id);
2219 
2220         if (clone0 && !pcmk_is_set(clone0->flags, pcmk__rsc_unique)) {
2221             rsc = clone0;
2222             parent = uber_parent(clone0);
2223             crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2224         } else {
2225             crm_trace("%s is not known as %s either (orphan)",
2226                       rsc_id, clone0_id);
2227         }
2228         free(clone0_id);
2229 
2230     } else if (rsc->priv->variant > pcmk__rsc_variant_primitive) {
2231         crm_trace("Resource history for %s is orphaned "
2232                   "because it is no longer primitive", rsc_id);
2233         return NULL;
2234 
2235     } else {
2236         parent = uber_parent(rsc);
2237     }
2238 
2239     if (pcmk__is_anonymous_clone(parent)) {
2240 
2241         if (pcmk__is_bundled(parent)) {
2242             rsc = pe__find_bundle_replica(parent->priv->parent, node);
2243         } else {
2244             char *base = clone_strip(rsc_id);
2245 
2246             rsc = find_anonymous_clone(scheduler, node, parent, base);
2247             free(base);
2248             pcmk__assert(rsc != NULL);
2249         }
2250     }
2251 
2252     if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
2253         && !pcmk__str_eq(rsc_id, rsc->priv->history_id, pcmk__str_none)) {
2254 
2255         pcmk__str_update(&(rsc->priv->history_id), rsc_id);
2256         pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2257                         rsc_id, pcmk__node_name(node), rsc->id,
2258                         pcmk_is_set(rsc->flags, pcmk__rsc_removed)? " (ORPHAN)" : "");
2259     }
2260     return rsc;
2261 }
2262 
2263 static pcmk_resource_t *
2264 process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2265                         pcmk_scheduler_t *scheduler)
2266 {
2267     pcmk_resource_t *rsc = NULL;
2268     const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2269 
2270     crm_debug("Detected orphan resource %s on %s",
2271               rsc_id, pcmk__node_name(node));
2272     rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2273     if (rsc == NULL) {
2274         return NULL;
2275     }
2276 
2277     if (!pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
2278         pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
2279 
2280     } else {
2281         CRM_CHECK(rsc != NULL, return NULL);
2282         pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id);
2283         resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
2284                           "__orphan_do_not_run__", scheduler);
2285     }
2286     return rsc;
2287 }
2288 
2289 static void
2290 process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2291                   enum pcmk__on_fail on_fail)
2292 {
2293     pcmk_node_t *tmpnode = NULL;
2294     char *reason = NULL;
2295     enum pcmk__on_fail save_on_fail = pcmk__on_fail_ignore;
2296     pcmk_scheduler_t *scheduler = NULL;
2297     bool known_active = false;
2298 
2299     pcmk__assert(rsc != NULL);
2300     scheduler = rsc->priv->scheduler;
2301     known_active = (rsc->priv->orig_role > pcmk_role_stopped);
2302     pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2303                     rsc->id, pcmk_role_text(rsc->priv->orig_role),
2304                     pcmk__node_name(node), pcmk__on_fail_text(on_fail));
2305 
2306     /* process current state */
2307     if (rsc->priv->orig_role != pcmk_role_unknown) {
2308         pcmk_resource_t *iter = rsc;
2309 
2310         while (iter) {
2311             if (g_hash_table_lookup(iter->priv->probed_nodes,
2312                                     node->priv->id) == NULL) {
2313                 pcmk_node_t *n = pe__copy_node(node);
2314 
2315                 pcmk__rsc_trace(rsc, "%s (%s in history) known on %s",
2316                                 rsc->id,
2317                                 pcmk__s(rsc->priv->history_id, "the same"),
2318                                 pcmk__node_name(n));
2319                 g_hash_table_insert(iter->priv->probed_nodes,
2320                                     (gpointer) n->priv->id, n);
2321             }
2322             if (pcmk_is_set(iter->flags, pcmk__rsc_unique)) {
2323                 break;
2324             }
2325             iter = iter->priv->parent;
2326         }
2327     }
2328 
2329     /* If a managed resource is believed to be running, but node is down ... */
2330     if (known_active && !node->details->online && !node->details->maintenance
2331         && pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
2332 
2333         gboolean should_fence = FALSE;
2334 
2335         /* If this is a guest node, fence it (regardless of whether fencing is
2336          * enabled, because guest node fencing is done by recovery of the
2337          * container resource rather than by the fencer). Mark the resource
2338          * we're processing as failed. When the guest comes back up, its
2339          * operation history in the CIB will be cleared, freeing the affected
2340          * resource to run again once we are sure we know its state.
2341          */
2342         if (pcmk__is_guest_or_bundle_node(node)) {
2343             pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2344             should_fence = TRUE;
2345 
2346         } else if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
2347             if (pcmk__is_remote_node(node)
2348                 && (node->priv->remote != NULL)
2349                 && !pcmk_is_set(node->priv->remote->flags,
2350                                 pcmk__rsc_failed)) {
2351 
2352                 /* Setting unseen means that fencing of the remote node will
2353                  * occur only if the connection resource is not going to start
2354                  * somewhere. This allows connection resources on a failed
2355                  * cluster node to move to another node without requiring the
2356                  * remote nodes to be fenced as well.
2357                  */
2358                 pcmk__clear_node_flags(node, pcmk__node_seen);
2359                 reason = crm_strdup_printf("%s is active there (fencing will be"
2360                                            " revoked if remote connection can "
2361                                            "be re-established elsewhere)",
2362                                            rsc->id);
2363             }
2364             should_fence = TRUE;
2365         }
2366 
2367         if (should_fence) {
2368             if (reason == NULL) {
2369                reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
2370             }
2371             pe_fence_node(scheduler, node, reason, FALSE);
2372         }
2373         free(reason);
2374     }
2375 
2376     /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2377     save_on_fail = on_fail;
2378 
2379     if (node->details->unclean) {
2380         /* No extra processing needed
2381          * Also allows resources to be started again after a node is shot
2382          */
2383         on_fail = pcmk__on_fail_ignore;
2384     }
2385 
2386     switch (on_fail) {
2387         case pcmk__on_fail_ignore:
2388             /* nothing to do */
2389             break;
2390 
2391         case pcmk__on_fail_demote:
2392             pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
2393             demote_action(rsc, node, FALSE);
2394             break;
2395 
2396         case pcmk__on_fail_fence_node:
2397             /* treat it as if it is still running
2398              * but also mark the node as unclean
2399              */
2400             reason = crm_strdup_printf("%s failed there", rsc->id);
2401             pe_fence_node(scheduler, node, reason, FALSE);
2402             free(reason);
2403             break;
2404 
2405         case pcmk__on_fail_standby_node:
2406             pcmk__set_node_flags(node,
2407                                  pcmk__node_standby|pcmk__node_fail_standby);
2408             break;
2409 
2410         case pcmk__on_fail_block:
2411             /* is_managed == FALSE will prevent any
2412              * actions being sent for the resource
2413              */
2414             pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
2415             pcmk__set_rsc_flags(rsc, pcmk__rsc_blocked);
2416             break;
2417 
2418         case pcmk__on_fail_ban:
2419             /* make sure it comes up somewhere else
2420              * or not at all
2421              */
2422             resource_location(rsc, node, -PCMK_SCORE_INFINITY,
2423                               "__action_migration_auto__", scheduler);
2424             break;
2425 
2426         case pcmk__on_fail_stop:
2427             pe__set_next_role(rsc, pcmk_role_stopped,
2428                               PCMK_META_ON_FAIL "=" PCMK_VALUE_STOP);
2429             break;
2430 
2431         case pcmk__on_fail_restart:
2432             if (known_active) {
2433                 pcmk__set_rsc_flags(rsc,
2434                                     pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2435                 stop_action(rsc, node, FALSE);
2436             }
2437             break;
2438 
2439         case pcmk__on_fail_restart_container:
2440             pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2441             if ((rsc->priv->launcher != NULL) && pcmk__is_bundled(rsc)) {
2442                 /* A bundle's remote connection can run on a different node than
2443                  * the bundle's container. We don't necessarily know where the
2444                  * container is running yet, so remember it and add a stop
2445                  * action for it later.
2446                  */
2447                 scheduler->priv->stop_needed =
2448                     g_list_prepend(scheduler->priv->stop_needed,
2449                                    rsc->priv->launcher);
2450             } else if (rsc->priv->launcher != NULL) {
2451                 stop_action(rsc->priv->launcher, node, FALSE);
2452             } else if (known_active) {
2453                 stop_action(rsc, node, FALSE);
2454             }
2455             break;
2456 
2457         case pcmk__on_fail_reset_remote:
2458             pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2459             if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
2460                 tmpnode = NULL;
2461                 if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
2462                     tmpnode = pcmk_find_node(scheduler, rsc->id);
2463                 }
2464                 if (pcmk__is_remote_node(tmpnode)
2465                     && !pcmk_is_set(tmpnode->priv->flags,
2466                                     pcmk__node_remote_fenced)) {
2467                     /* The remote connection resource failed in a way that
2468                      * should result in fencing the remote node.
2469                      */
2470                     pe_fence_node(scheduler, tmpnode,
2471                                   "remote connection is unrecoverable", FALSE);
2472                 }
2473             }
2474 
2475             /* require the stop action regardless if fencing is occurring or not. */
2476             if (known_active) {
2477                 stop_action(rsc, node, FALSE);
2478             }
2479 
2480             /* if reconnect delay is in use, prevent the connection from exiting the
2481              * "STOPPED" role until the failure is cleared by the delay timeout. */
2482             if (rsc->priv->remote_reconnect_ms > 0U) {
2483                 pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2484             }
2485             break;
2486     }
2487 
2488     /* Ensure a remote connection failure forces an unclean Pacemaker Remote
2489      * node to be fenced. By marking the node as seen, the failure will result
2490      * in a fencing operation regardless if we're going to attempt to reconnect
2491      * in this transition.
2492      */
2493     if (pcmk_all_flags_set(rsc->flags,
2494                            pcmk__rsc_failed|pcmk__rsc_is_remote_connection)) {
2495         tmpnode = pcmk_find_node(scheduler, rsc->id);
2496         if (tmpnode && tmpnode->details->unclean) {
2497             pcmk__set_node_flags(tmpnode, pcmk__node_seen);
2498         }
2499     }
2500 
2501     if (known_active) {
2502         if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
2503             if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
2504                 crm_notice("Removed resource %s is active on %s and will be "
2505                            "stopped when possible",
2506                            rsc->id, pcmk__node_name(node));
2507             } else {
2508                 crm_notice("Removed resource %s must be stopped manually on %s "
2509                            "because " PCMK_OPT_STOP_ORPHAN_RESOURCES
2510                            " is set to false", rsc->id, pcmk__node_name(node));
2511             }
2512         }
2513 
2514         native_add_running(rsc, node, scheduler,
2515                            (save_on_fail != pcmk__on_fail_ignore));
2516         switch (on_fail) {
2517             case pcmk__on_fail_ignore:
2518                 break;
2519             case pcmk__on_fail_demote:
2520             case pcmk__on_fail_block:
2521                 pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
2522                 break;
2523             default:
2524                 pcmk__set_rsc_flags(rsc,
2525                                     pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2526                 break;
2527         }
2528 
2529     } else if ((rsc->priv->history_id != NULL)
2530                && (strchr(rsc->priv->history_id, ':') != NULL)) {
2531         /* Only do this for older status sections that included instance numbers
2532          * Otherwise stopped instances will appear as orphans
2533          */
2534         pcmk__rsc_trace(rsc, "Clearing history ID %s for %s (stopped)",
2535                         rsc->priv->history_id, rsc->id);
2536         free(rsc->priv->history_id);
2537         rsc->priv->history_id = NULL;
2538 
2539     } else {
2540         GList *possible_matches = pe__resource_actions(rsc, node,
2541                                                        PCMK_ACTION_STOP, FALSE);
2542         GList *gIter = possible_matches;
2543 
2544         for (; gIter != NULL; gIter = gIter->next) {
2545             pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2546 
2547             pcmk__set_action_flags(stop, pcmk__action_optional);
2548         }
2549 
2550         g_list_free(possible_matches);
2551     }
2552 
2553     /* A successful stop after migrate_to on the migration source doesn't make
2554      * the partially migrated resource stopped on the migration target.
2555      */
2556     if ((rsc->priv->orig_role == pcmk_role_stopped)
2557         && (rsc->priv->active_nodes != NULL)
2558         && (rsc->priv->partial_migration_target != NULL)
2559         && pcmk__same_node(rsc->priv->partial_migration_source, node)) {
2560 
2561         rsc->priv->orig_role = pcmk_role_started;
2562     }
2563 }
2564 
2565 /* create active recurring operations as optional */
2566 static void
2567 process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
2568                   int start_index, int stop_index,
2569                   GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2570 {
2571     int counter = -1;
2572     const char *task = NULL;
2573     const char *status = NULL;
2574     GList *gIter = sorted_op_list;
2575 
2576     pcmk__assert(rsc != NULL);
2577     pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
2578                     rsc->id, start_index, stop_index);
2579 
2580     for (; gIter != NULL; gIter = gIter->next) {
2581         xmlNode *rsc_op = (xmlNode *) gIter->data;
2582 
2583         guint interval_ms = 0;
2584         char *key = NULL;
2585         const char *id = pcmk__xe_id(rsc_op);
2586 
2587         counter++;
2588 
2589         if (node->details->online == FALSE) {
2590             pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
2591                             rsc->id, pcmk__node_name(node));
2592             break;
2593 
2594             /* Need to check if there's a monitor for role="Stopped" */
2595         } else if (start_index < stop_index && counter <= stop_index) {
2596             pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2597                             id, pcmk__node_name(node));
2598             continue;
2599 
2600         } else if (counter < start_index) {
2601             pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
2602                             id, pcmk__node_name(node), counter);
2603             continue;
2604         }
2605 
2606         crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms);
2607         if (interval_ms == 0) {
2608             pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2609                             id, pcmk__node_name(node));
2610             continue;
2611         }
2612 
2613         status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2614         if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2615             pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
2616                             id, pcmk__node_name(node));
2617             continue;
2618         }
2619         task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2620         /* create the action */
2621         key = pcmk__op_key(rsc->id, task, interval_ms);
2622         pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
2623         custom_action(rsc, key, task, node, TRUE, scheduler);
2624     }
2625 }
2626 
2627 void
2628 calculate_active_ops(const GList *sorted_op_list, int *start_index,
     /* [previous][next][first][last][top][bottom][index][help] */
2629                      int *stop_index)
2630 {
2631     int counter = -1;
2632     int implied_monitor_start = -1;
2633     int implied_clone_start = -1;
2634     const char *task = NULL;
2635     const char *status = NULL;
2636 
2637     *stop_index = -1;
2638     *start_index = -1;
2639 
2640     for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2641         const xmlNode *rsc_op = (const xmlNode *) iter->data;
2642 
2643         counter++;
2644 
2645         task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2646         status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2647 
2648         if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2649             && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2650             *stop_index = counter;
2651 
2652         } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2653                                         PCMK_ACTION_MIGRATE_FROM, NULL)) {
2654             *start_index = counter;
2655 
2656         } else if ((implied_monitor_start <= *stop_index)
2657                    && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2658                                    pcmk__str_casei)) {
2659             const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE);
2660 
2661             if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2662                 implied_monitor_start = counter;
2663             }
2664         } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
2665                                         PCMK_ACTION_DEMOTE, NULL)) {
2666             implied_clone_start = counter;
2667         }
2668     }
2669 
2670     if (*start_index == -1) {
2671         if (implied_clone_start != -1) {
2672             *start_index = implied_clone_start;
2673         } else if (implied_monitor_start != -1) {
2674             *start_index = implied_monitor_start;
2675         }
2676     }
2677 }
2678 
2679 // If resource history entry has shutdown lock, remember lock node and time
2680 static void
2681 unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
2682                      const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2683 {
2684     time_t lock_time = 0;   // When lock started (i.e. node shutdown time)
2685 
2686     if ((crm_element_value_epoch(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK,
2687                                  &lock_time) == pcmk_ok) && (lock_time != 0)) {
2688 
2689         if ((scheduler->priv->shutdown_lock_ms > 0U)
2690             && (get_effective_time(scheduler)
2691                 > (lock_time + pcmk__timeout_ms2s(scheduler->priv->shutdown_lock_ms)))) {
2692             pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
2693                            rsc->id, pcmk__node_name(node));
2694             pe__clear_resource_history(rsc, node);
2695         } else {
2696             rsc->priv->lock_node = node;
2697             rsc->priv->lock_time = lock_time;
2698         }
2699     }
2700 }
2701 
2702 /*!
2703  * \internal
2704  * \brief Unpack one \c PCMK__XE_LRM_RESOURCE entry from a node's CIB status
2705  *
2706  * \param[in,out] node       Node whose status is being unpacked
2707  * \param[in]     rsc_entry  \c PCMK__XE_LRM_RESOURCE XML being unpacked
2708  * \param[in,out] scheduler  Scheduler data
2709  *
2710  * \return Resource corresponding to the entry, or NULL if no operation history
2711  */
2712 static pcmk_resource_t *
2713 unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
     /* [previous][next][first][last][top][bottom][index][help] */
2714                     pcmk_scheduler_t *scheduler)
2715 {
2716     GList *gIter = NULL;
2717     int stop_index = -1;
2718     int start_index = -1;
2719     enum rsc_role_e req_role = pcmk_role_unknown;
2720 
2721     const char *rsc_id = pcmk__xe_id(lrm_resource);
2722 
2723     pcmk_resource_t *rsc = NULL;
2724     GList *op_list = NULL;
2725     GList *sorted_op_list = NULL;
2726 
2727     xmlNode *rsc_op = NULL;
2728     xmlNode *last_failure = NULL;
2729 
2730     enum pcmk__on_fail on_fail = pcmk__on_fail_ignore;
2731     enum rsc_role_e saved_role = pcmk_role_unknown;
2732 
2733     if (rsc_id == NULL) {
2734         pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
2735                          " entry: No " PCMK_XA_ID);
2736         crm_log_xml_info(lrm_resource, "missing-id");
2737         return NULL;
2738     }
2739     crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s",
2740               rsc_id, pcmk__node_name(node));
2741 
2742     /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
2743      * them
2744      */
2745     for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
2746                                        NULL);
2747          rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
2748 
2749         op_list = g_list_prepend(op_list, rsc_op);
2750     }
2751 
2752     if (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
2753         if (op_list == NULL) {
2754             // If there are no operations, there is nothing to do
2755             return NULL;
2756         }
2757     }
2758 
2759     /* find the resource */
2760     rsc = unpack_find_resource(scheduler, node, rsc_id);
2761     if (rsc == NULL) {
2762         if (op_list == NULL) {
2763             // If there are no operations, there is nothing to do
2764             return NULL;
2765         } else {
2766             rsc = process_orphan_resource(lrm_resource, node, scheduler);
2767         }
2768     }
2769     pcmk__assert(rsc != NULL);
2770 
2771     // Check whether the resource is "shutdown-locked" to this node
2772     if (pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
2773         unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2774     }
2775 
2776     /* process operations */
2777     saved_role = rsc->priv->orig_role;
2778     rsc->priv->orig_role = pcmk_role_unknown;
2779     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2780 
2781     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2782         xmlNode *rsc_op = (xmlNode *) gIter->data;
2783 
2784         unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2785     }
2786 
2787     /* create active recurring operations as optional */
2788     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2789     process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2790                       scheduler);
2791 
2792     /* no need to free the contents */
2793     g_list_free(sorted_op_list);
2794 
2795     process_rsc_state(rsc, node, on_fail);
2796 
2797     if (get_target_role(rsc, &req_role)) {
2798         if ((rsc->priv->next_role == pcmk_role_unknown)
2799             || (req_role < rsc->priv->next_role)) {
2800 
2801             pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE);
2802 
2803         } else if (req_role > rsc->priv->next_role) {
2804             pcmk__rsc_info(rsc,
2805                            "%s: Not overwriting calculated next role %s"
2806                            " with requested next role %s",
2807                            rsc->id, pcmk_role_text(rsc->priv->next_role),
2808                            pcmk_role_text(req_role));
2809         }
2810     }
2811 
2812     if (saved_role > rsc->priv->orig_role) {
2813         rsc->priv->orig_role = saved_role;
2814     }
2815 
2816     return rsc;
2817 }
2818 
2819 static void
2820 handle_removed_launched_resources(const xmlNode *lrm_rsc_list,
     /* [previous][next][first][last][top][bottom][index][help] */
2821                                   pcmk_scheduler_t *scheduler)
2822 {
2823     for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list,
2824                                                          PCMK__XE_LRM_RESOURCE,
2825                                                          NULL, NULL);
2826          rsc_entry != NULL;
2827          rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2828 
2829         pcmk_resource_t *rsc;
2830         pcmk_resource_t *launcher = NULL;
2831         const char *rsc_id;
2832         const char *launcher_id = NULL;
2833 
2834         launcher_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER);
2835         rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2836         if ((launcher_id == NULL) || (rsc_id == NULL)) {
2837             continue;
2838         }
2839 
2840         launcher = pe_find_resource(scheduler->priv->resources, launcher_id);
2841         if (launcher == NULL) {
2842             continue;
2843         }
2844 
2845         rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2846         if ((rsc == NULL) || (rsc->priv->launcher != NULL)
2847             || !pcmk_is_set(rsc->flags, pcmk__rsc_removed_launched)) {
2848             continue;
2849         }
2850 
2851         pcmk__rsc_trace(rsc, "Mapped launcher of removed resource %s to %s",
2852                         rsc->id, launcher_id);
2853         rsc->priv->launcher = launcher;
2854         launcher->priv->launched = g_list_append(launcher->priv->launched,
2855                                                     rsc);
2856     }
2857 }
2858 
2859 /*!
2860  * \internal
2861  * \brief Unpack one node's lrm status section
2862  *
2863  * \param[in,out] node       Node whose status is being unpacked
2864  * \param[in]     xml        CIB node state XML
2865  * \param[in,out] scheduler  Scheduler data
2866  */
2867 static void
2868 unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
     /* [previous][next][first][last][top][bottom][index][help] */
2869                 pcmk_scheduler_t *scheduler)
2870 {
2871     bool found_removed_launched_resource = false;
2872 
2873     // Drill down to PCMK__XE_LRM_RESOURCES section
2874     xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
2875     if (xml == NULL) {
2876         return;
2877     }
2878     xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
2879     if (xml == NULL) {
2880         return;
2881     }
2882 
2883     // Unpack each PCMK__XE_LRM_RESOURCE entry
2884     for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
2885                                                          PCMK__XE_LRM_RESOURCE,
2886                                                          NULL, NULL);
2887          rsc_entry != NULL;
2888          rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2889 
2890         pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2891 
2892         if ((rsc != NULL)
2893             && pcmk_is_set(rsc->flags, pcmk__rsc_removed_launched)) {
2894             found_removed_launched_resource = true;
2895         }
2896     }
2897 
2898     /* Now that all resource state has been unpacked for this node, map any
2899      * removed launched resources to their launchers.
2900      */
2901     if (found_removed_launched_resource) {
2902         handle_removed_launched_resources(xml, scheduler);
2903     }
2904 }
2905 
2906 static void
2907 set_active(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
2908 {
2909     const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2910 
2911     if (top && pcmk_is_set(top->flags, pcmk__rsc_promotable)) {
2912         rsc->priv->orig_role = pcmk_role_unpromoted;
2913     } else {
2914         rsc->priv->orig_role = pcmk_role_started;
2915     }
2916 }
2917 
2918 static void
2919 set_node_score(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
2920 {
2921     pcmk_node_t *node = value;
2922     int *score = user_data;
2923 
2924     node->assign->score = *score;
2925 }
2926 
2927 #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
2928                          "/" PCMK__XE_NODE_STATE
2929 #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM             \
2930                                "/" PCMK__XE_LRM_RESOURCES   \
2931                                "/" PCMK__XE_LRM_RESOURCE
2932 #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
2933 
2934 static xmlNode *
2935 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
     /* [previous][next][first][last][top][bottom][index][help] */
2936             int target_rc, pcmk_scheduler_t *scheduler)
2937 {
2938     GString *xpath = NULL;
2939     xmlNode *xml = NULL;
2940 
2941     CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2942               return NULL);
2943 
2944     xpath = g_string_sized_new(256);
2945     pcmk__g_strcat(xpath,
2946                    XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
2947                    SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
2948                    SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
2949                    NULL);
2950 
2951     /* Need to check against transition_magic too? */
2952     if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
2953         pcmk__g_strcat(xpath,
2954                        " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
2955                        NULL);
2956 
2957     } else if ((source != NULL)
2958                && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
2959         pcmk__g_strcat(xpath,
2960                        " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
2961                        NULL);
2962     } else {
2963         g_string_append_c(xpath, ']');
2964     }
2965 
2966     xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2967                            LOG_DEBUG);
2968     g_string_free(xpath, TRUE);
2969 
2970     if (xml && target_rc >= 0) {
2971         int rc = PCMK_OCF_UNKNOWN_ERROR;
2972         int status = PCMK_EXEC_ERROR;
2973 
2974         crm_element_value_int(xml, PCMK__XA_RC_CODE, &rc);
2975         crm_element_value_int(xml, PCMK__XA_OP_STATUS, &status);
2976         if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
2977             return NULL;
2978         }
2979     }
2980     return xml;
2981 }
2982 
2983 static xmlNode *
2984 find_lrm_resource(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
2985                   pcmk_scheduler_t *scheduler)
2986 {
2987     GString *xpath = NULL;
2988     xmlNode *xml = NULL;
2989 
2990     CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
2991 
2992     xpath = g_string_sized_new(256);
2993     pcmk__g_strcat(xpath,
2994                    XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
2995                    SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
2996                    NULL);
2997 
2998     xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2999                            LOG_DEBUG);
3000 
3001     g_string_free(xpath, TRUE);
3002     return xml;
3003 }
3004 
3005 /*!
3006  * \internal
3007  * \brief Check whether a resource has no completed action history on a node
3008  *
3009  * \param[in,out] rsc        Resource to check
3010  * \param[in]     node_name  Node to check
3011  *
3012  * \return true if \p rsc_id is unknown on \p node_name, otherwise false
3013  */
3014 static bool
3015 unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
3016 {
3017     bool result = false;
3018     xmlXPathObjectPtr search;
3019     char *xpath = NULL;
3020 
3021     xpath = crm_strdup_printf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
3022                               SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']"
3023                               SUB_XPATH_LRM_RSC_OP
3024                               "[@" PCMK__XA_RC_CODE "!='%d']",
3025                               node_name, rsc->id, PCMK_OCF_UNKNOWN);
3026 
3027     search = xpath_search(rsc->priv->scheduler->input, xpath);
3028     result = (numXpathResults(search) == 0);
3029     freeXpathObject(search);
3030     free(xpath);
3031     return result;
3032 }
3033 
3034 /*!
3035  * \internal
3036  * \brief Check whether a probe/monitor indicating the resource was not running
3037  *        on a node happened after some event
3038  *
3039  * \param[in]     rsc_id     Resource being checked
3040  * \param[in]     node_name  Node being checked
3041  * \param[in]     xml_op     Event that monitor is being compared to
3042  * \param[in,out] scheduler  Scheduler data
3043  *
3044  * \return true if such a monitor happened after event, false otherwise
3045  */
3046 static bool
3047 monitor_not_running_after(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
3048                           const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3049 {
3050     /* Any probe/monitor operation on the node indicating it was not running
3051      * there
3052      */
3053     xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
3054                                    NULL, PCMK_OCF_NOT_RUNNING, scheduler);
3055 
3056     return (monitor != NULL) && (pe__is_newer_op(monitor, xml_op) > 0);
3057 }
3058 
3059 /*!
3060  * \internal
3061  * \brief Check whether any non-monitor operation on a node happened after some
3062  *        event
3063  *
3064  * \param[in]     rsc_id     Resource being checked
3065  * \param[in]     node_name  Node being checked
3066  * \param[in]     xml_op     Event that non-monitor is being compared to
3067  * \param[in,out] scheduler  Scheduler data
3068  *
3069  * \return true if such a operation happened after event, false otherwise
3070  */
3071 static bool
3072 non_monitor_after(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
3073                   const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3074 {
3075     xmlNode *lrm_resource = NULL;
3076 
3077     lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
3078     if (lrm_resource == NULL) {
3079         return false;
3080     }
3081 
3082     for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
3083                                             NULL, NULL);
3084          op != NULL; op = pcmk__xe_next(op, PCMK__XE_LRM_RSC_OP)) {
3085 
3086         const char * task = NULL;
3087 
3088         if (op == xml_op) {
3089             continue;
3090         }
3091 
3092         task = crm_element_value(op, PCMK_XA_OPERATION);
3093 
3094         if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP,
3095                              PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
3096                              NULL)
3097             && pe__is_newer_op(op, xml_op) > 0) {
3098             return true;
3099         }
3100     }
3101 
3102     return false;
3103 }
3104 
3105 /*!
3106  * \internal
3107  * \brief Check whether the resource has newer state on a node after a migration
3108  *        attempt
3109  *
3110  * \param[in]     rsc_id        Resource being checked
3111  * \param[in]     node_name     Node being checked
3112  * \param[in]     migrate_to    Any migrate_to event that is being compared to
3113  * \param[in]     migrate_from  Any migrate_from event that is being compared to
3114  * \param[in,out] scheduler     Scheduler data
3115  *
3116  * \return true if such a operation happened after event, false otherwise
3117  */
3118 static bool
3119 newer_state_after_migrate(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
3120                           const xmlNode *migrate_to,
3121                           const xmlNode *migrate_from,
3122                           pcmk_scheduler_t *scheduler)
3123 {
3124     const xmlNode *xml_op = (migrate_from != NULL)? migrate_from : migrate_to;
3125     const char *source = crm_element_value(xml_op, PCMK__META_MIGRATE_SOURCE);
3126 
3127     /* It's preferred to compare to the migrate event on the same node if
3128      * existing, since call ids are more reliable.
3129      */
3130     if ((xml_op != migrate_to) && (migrate_to != NULL)
3131         && pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3132 
3133         xml_op = migrate_to;
3134     }
3135 
3136     /* If there's any newer non-monitor operation on the node, or any newer
3137      * probe/monitor operation on the node indicating it was not running there,
3138      * the migration events potentially no longer matter for the node.
3139      */
3140     return non_monitor_after(rsc_id, node_name, xml_op, scheduler)
3141            || monitor_not_running_after(rsc_id, node_name, xml_op, scheduler);
3142 }
3143 
3144 /*!
3145  * \internal
3146  * \brief Parse migration source and target node names from history entry
3147  *
3148  * \param[in]  entry        Resource history entry for a migration action
3149  * \param[in]  source_node  If not NULL, source must match this node
3150  * \param[in]  target_node  If not NULL, target must match this node
3151  * \param[out] source_name  Where to store migration source node name
3152  * \param[out] target_name  Where to store migration target node name
3153  *
3154  * \return Standard Pacemaker return code
3155  */
3156 static int
3157 get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
     /* [previous][next][first][last][top][bottom][index][help] */
3158                          const pcmk_node_t *target_node,
3159                          const char **source_name, const char **target_name)
3160 {
3161     *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE);
3162     *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET);
3163     if ((*source_name == NULL) || (*target_name == NULL)) {
3164         pcmk__config_err("Ignoring resource history entry %s without "
3165                          PCMK__META_MIGRATE_SOURCE " and "
3166                          PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
3167         return pcmk_rc_unpack_error;
3168     }
3169 
3170     if ((source_node != NULL)
3171         && !pcmk__str_eq(*source_name, source_node->priv->name,
3172                          pcmk__str_casei|pcmk__str_null_matches)) {
3173         pcmk__config_err("Ignoring resource history entry %s because "
3174                          PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
3175                          pcmk__xe_id(entry), *source_name,
3176                          pcmk__node_name(source_node));
3177         return pcmk_rc_unpack_error;
3178     }
3179 
3180     if ((target_node != NULL)
3181         && !pcmk__str_eq(*target_name, target_node->priv->name,
3182                          pcmk__str_casei|pcmk__str_null_matches)) {
3183         pcmk__config_err("Ignoring resource history entry %s because "
3184                          PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
3185                          pcmk__xe_id(entry), *target_name,
3186                          pcmk__node_name(target_node));
3187         return pcmk_rc_unpack_error;
3188     }
3189 
3190     return pcmk_rc_ok;
3191 }
3192 
3193 /*
3194  * \internal
3195  * \brief Add a migration source to a resource's list of dangling migrations
3196  *
3197  * If the migrate_to and migrate_from actions in a live migration both
3198  * succeeded, but there is no stop on the source, the migration is considered
3199  * "dangling." Add the source to the resource's dangling migration list, which
3200  * will be used to schedule a stop on the source without affecting the target.
3201  *
3202  * \param[in,out] rsc   Resource involved in migration
3203  * \param[in]     node  Migration source
3204  */
3205 static void
3206 add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
3207 {
3208     pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3209                     rsc->id, pcmk__node_name(node));
3210     rsc->priv->orig_role = pcmk_role_stopped;
3211     rsc->priv->dangling_migration_sources =
3212         g_list_prepend(rsc->priv->dangling_migration_sources,
3213                        (gpointer) node);
3214 }
3215 
3216 /*!
3217  * \internal
3218  * \brief Update resource role etc. after a successful migrate_to action
3219  *
3220  * \param[in,out] history  Parsed action result history
3221  */
3222 static void
3223 unpack_migrate_to_success(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3224 {
3225     /* A complete migration sequence is:
3226      * 1. migrate_to on source node (which succeeded if we get to this function)
3227      * 2. migrate_from on target node
3228      * 3. stop on source node
3229      *
3230      * If no migrate_from has happened, the migration is considered to be
3231      * "partial". If the migrate_from succeeded but no stop has happened, the
3232      * migration is considered to be "dangling".
3233      *
3234      * If a successful migrate_to and stop have happened on the source node, we
3235      * still need to check for a partial migration, due to scenarios (easier to
3236      * produce with batch-limit=1) like:
3237      *
3238      * - A resource is migrating from node1 to node2, and a migrate_to is
3239      *   initiated for it on node1.
3240      *
3241      * - node2 goes into standby mode while the migrate_to is pending, which
3242      *   aborts the transition.
3243      *
3244      * - Upon completion of the migrate_to, a new transition schedules a stop
3245      *   on both nodes and a start on node1.
3246      *
3247      * - If the new transition is aborted for any reason while the resource is
3248      *   stopping on node1, the transition after that stop completes will see
3249      *   the migrate_to and stop on the source, but it's still a partial
3250      *   migration, and the resource must be stopped on node2 because it is
3251      *   potentially active there due to the migrate_to.
3252      *
3253      *   We also need to take into account that either node's history may be
3254      *   cleared at any point in the migration process.
3255      */
3256     int from_rc = PCMK_OCF_OK;
3257     int from_status = PCMK_EXEC_PENDING;
3258     pcmk_node_t *target_node = NULL;
3259     xmlNode *migrate_from = NULL;
3260     const char *source = NULL;
3261     const char *target = NULL;
3262     bool source_newer_op = false;
3263     bool target_newer_state = false;
3264     bool active_on_target = false;
3265     pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3266 
3267     // Get source and target node names from XML
3268     if (get_migration_node_names(history->xml, history->node, NULL, &source,
3269                                  &target) != pcmk_rc_ok) {
3270         return;
3271     }
3272 
3273     // Check for newer state on the source
3274     source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3275                                         scheduler);
3276 
3277     // Check for a migrate_from action from this source on the target
3278     migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3279                                target, source, -1, scheduler);
3280     if (migrate_from != NULL) {
3281         if (source_newer_op) {
3282             /* There's a newer non-monitor operation on the source and a
3283              * migrate_from on the target, so this migrate_to is irrelevant to
3284              * the resource's state.
3285              */
3286             return;
3287         }
3288         crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
3289         crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
3290     }
3291 
3292     /* If the resource has newer state on both the source and target after the
3293      * migration events, this migrate_to is irrelevant to the resource's state.
3294      */
3295     target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3296                                                    history->xml, migrate_from,
3297                                                    scheduler);
3298     if (source_newer_op && target_newer_state) {
3299         return;
3300     }
3301 
3302     /* Check for dangling migration (migrate_from succeeded but stop not done).
3303      * We know there's no stop because we already returned if the target has a
3304      * migrate_from and the source has any newer non-monitor operation.
3305      */
3306     if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3307         add_dangling_migration(history->rsc, history->node);
3308         return;
3309     }
3310 
3311     /* Without newer state, this migrate_to implies the resource is active.
3312      * (Clones are not allowed to migrate, so role can't be promoted.)
3313      */
3314     history->rsc->priv->orig_role = pcmk_role_started;
3315 
3316     target_node = pcmk_find_node(scheduler, target);
3317     active_on_target = !target_newer_state && (target_node != NULL)
3318                        && target_node->details->online;
3319 
3320     if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3321         if (active_on_target) {
3322             native_add_running(history->rsc, target_node, scheduler, TRUE);
3323         } else {
3324             // Mark resource as failed, require recovery, and prevent migration
3325             pcmk__set_rsc_flags(history->rsc,
3326                                 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
3327             pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
3328         }
3329         return;
3330     }
3331 
3332     // The migrate_from is pending, complete but erased, or to be scheduled
3333 
3334     /* If there is no history at all for the resource on an online target, then
3335      * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3336      * have the probe result, it will be reflected in target_newer_state.
3337      */
3338     if ((target_node != NULL) && target_node->details->online
3339         && unknown_on_node(history->rsc, target)) {
3340         return;
3341     }
3342 
3343     if (active_on_target) {
3344         pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3345 
3346         native_add_running(history->rsc, target_node, scheduler, FALSE);
3347         if ((source_node != NULL) && source_node->details->online) {
3348             /* This is a partial migration: the migrate_to completed
3349              * successfully on the source, but the migrate_from has not
3350              * completed. Remember the source and target; if the newly
3351              * chosen target remains the same when we schedule actions
3352              * later, we may continue with the migration.
3353              */
3354             history->rsc->priv->partial_migration_target = target_node;
3355             history->rsc->priv->partial_migration_source = source_node;
3356         }
3357 
3358     } else if (!source_newer_op) {
3359         // Mark resource as failed, require recovery, and prevent migration
3360         pcmk__set_rsc_flags(history->rsc,
3361                             pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
3362         pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
3363     }
3364 }
3365 
3366 /*!
3367  * \internal
3368  * \brief Update resource role etc. after a failed migrate_to action
3369  *
3370  * \param[in,out] history  Parsed action result history
3371  */
3372 static void
3373 unpack_migrate_to_failure(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3374 {
3375     xmlNode *target_migrate_from = NULL;
3376     const char *source = NULL;
3377     const char *target = NULL;
3378     pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3379 
3380     // Get source and target node names from XML
3381     if (get_migration_node_names(history->xml, history->node, NULL, &source,
3382                                  &target) != pcmk_rc_ok) {
3383         return;
3384     }
3385 
3386     /* If a migration failed, we have to assume the resource is active. Clones
3387      * are not allowed to migrate, so role can't be promoted.
3388      */
3389     history->rsc->priv->orig_role = pcmk_role_started;
3390 
3391     // Check for migrate_from on the target
3392     target_migrate_from = find_lrm_op(history->rsc->id,
3393                                       PCMK_ACTION_MIGRATE_FROM, target, source,
3394                                       PCMK_OCF_OK, scheduler);
3395 
3396     if (/* If the resource state is unknown on the target, it will likely be
3397          * probed there.
3398          * Don't just consider it running there. We will get back here anyway in
3399          * case the probe detects it's running there.
3400          */
3401         !unknown_on_node(history->rsc, target)
3402         /* If the resource has newer state on the target after the migration
3403          * events, this migrate_to no longer matters for the target.
3404          */
3405         && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3406                                       target_migrate_from, scheduler)) {
3407         /* The resource has no newer state on the target, so assume it's still
3408          * active there.
3409          * (if it is up).
3410          */
3411         pcmk_node_t *target_node = pcmk_find_node(scheduler, target);
3412 
3413         if (target_node && target_node->details->online) {
3414             native_add_running(history->rsc, target_node, scheduler, FALSE);
3415         }
3416 
3417     } else if (!non_monitor_after(history->rsc->id, source, history->xml,
3418                                   scheduler)) {
3419         /* We know the resource has newer state on the target, but this
3420          * migrate_to still matters for the source as long as there's no newer
3421          * non-monitor operation there.
3422          */
3423 
3424         // Mark node as having dangling migration so we can force a stop later
3425         history->rsc->priv->dangling_migration_sources =
3426             g_list_prepend(history->rsc->priv->dangling_migration_sources,
3427                            (gpointer) history->node);
3428     }
3429 }
3430 
3431 /*!
3432  * \internal
3433  * \brief Update resource role etc. after a failed migrate_from action
3434  *
3435  * \param[in,out] history  Parsed action result history
3436  */
3437 static void
3438 unpack_migrate_from_failure(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3439 {
3440     xmlNode *source_migrate_to = NULL;
3441     const char *source = NULL;
3442     const char *target = NULL;
3443     pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3444 
3445     // Get source and target node names from XML
3446     if (get_migration_node_names(history->xml, NULL, history->node, &source,
3447                                  &target) != pcmk_rc_ok) {
3448         return;
3449     }
3450 
3451     /* If a migration failed, we have to assume the resource is active. Clones
3452      * are not allowed to migrate, so role can't be promoted.
3453      */
3454     history->rsc->priv->orig_role = pcmk_role_started;
3455 
3456     // Check for a migrate_to on the source
3457     source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3458                                     source, target, PCMK_OCF_OK, scheduler);
3459 
3460     if (/* If the resource state is unknown on the source, it will likely be
3461          * probed there.
3462          * Don't just consider it running there. We will get back here anyway in
3463          * case the probe detects it's running there.
3464          */
3465         !unknown_on_node(history->rsc, source)
3466         /* If the resource has newer state on the source after the migration
3467          * events, this migrate_from no longer matters for the source.
3468          */
3469         && !newer_state_after_migrate(history->rsc->id, source,
3470                                       source_migrate_to, history->xml,
3471                                       scheduler)) {
3472         /* The resource has no newer state on the source, so assume it's still
3473          * active there (if it is up).
3474          */
3475         pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3476 
3477         if (source_node && source_node->details->online) {
3478             native_add_running(history->rsc, source_node, scheduler, TRUE);
3479         }
3480     }
3481 }
3482 
3483 /*!
3484  * \internal
3485  * \brief Add an action to cluster's list of failed actions
3486  *
3487  * \param[in,out] history  Parsed action result history
3488  */
3489 static void
3490 record_failed_op(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3491 {
3492     const pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3493 
3494     if (!(history->node->details->online)) {
3495         return;
3496     }
3497 
3498     for (const xmlNode *xIter = scheduler->priv->failed->children;
3499          xIter != NULL; xIter = xIter->next) {
3500 
3501         const char *key = pcmk__xe_history_key(xIter);
3502         const char *uname = crm_element_value(xIter, PCMK_XA_UNAME);
3503 
3504         if (pcmk__str_eq(history->key, key, pcmk__str_none)
3505             && pcmk__str_eq(uname, history->node->priv->name,
3506                             pcmk__str_casei)) {
3507             crm_trace("Skipping duplicate entry %s on %s",
3508                       history->key, pcmk__node_name(history->node));
3509             return;
3510         }
3511     }
3512 
3513     crm_trace("Adding entry for %s on %s to failed action list",
3514               history->key, pcmk__node_name(history->node));
3515     crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name);
3516     crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
3517     pcmk__xml_copy(scheduler->priv->failed, history->xml);
3518 }
3519 
3520 static char *
3521 last_change_str(const xmlNode *xml_op)
     /* [previous][next][first][last][top][bottom][index][help] */
3522 {
3523     time_t when;
3524     char *result = NULL;
3525 
3526     if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE,
3527                                 &when) == pcmk_ok) {
3528         char *when_s = pcmk__epoch2str(&when, 0);
3529         const char *p = strchr(when_s, ' ');
3530 
3531         // Skip day of week to make message shorter
3532         if ((p != NULL) && (*(++p) != '\0')) {
3533             result = pcmk__str_copy(p);
3534         }
3535         free(when_s);
3536     }
3537 
3538     if (result == NULL) {
3539         result = pcmk__str_copy("unknown_time");
3540     }
3541 
3542     return result;
3543 }
3544 
3545 /*!
3546  * \internal
3547  * \brief Ban a resource (or its clone if an anonymous instance) from all nodes
3548  *
3549  * \param[in,out] rsc  Resource to ban
3550  */
3551 static void
3552 ban_from_all_nodes(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
3553 {
3554     int score = -PCMK_SCORE_INFINITY;
3555     const pcmk_scheduler_t *scheduler = rsc->priv->scheduler;
3556 
3557     if (rsc->priv->parent != NULL) {
3558         pcmk_resource_t *parent = uber_parent(rsc);
3559 
3560         if (pcmk__is_anonymous_clone(parent)) {
3561             /* For anonymous clones, if an operation with
3562              * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
3563              * entire clone must stop.
3564              */
3565             rsc = parent;
3566         }
3567     }
3568 
3569     // Ban the resource from all nodes
3570     crm_notice("%s will not be started under current conditions", rsc->id);
3571     if (rsc->priv->allowed_nodes != NULL) {
3572         g_hash_table_destroy(rsc->priv->allowed_nodes);
3573     }
3574     rsc->priv->allowed_nodes = pe__node_list2table(scheduler->nodes);
3575     g_hash_table_foreach(rsc->priv->allowed_nodes, set_node_score, &score);
3576 }
3577 
3578 /*!
3579  * \internal
3580  * \brief Get configured failure handling and role after failure for an action
3581  *
3582  * \param[in,out] history    Unpacked action history entry
3583  * \param[out]    on_fail    Where to set configured failure handling
3584  * \param[out]    fail_role  Where to set to role after failure
3585  */
3586 static void
3587 unpack_failure_handling(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
3588                         enum pcmk__on_fail *on_fail,
3589                         enum rsc_role_e *fail_role)
3590 {
3591     xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3592                                                history->interval_ms, true);
3593 
3594     GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3595                                                 history->task,
3596                                                 history->interval_ms, config);
3597 
3598     const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
3599 
3600     *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3601                                    history->interval_ms, on_fail_str);
3602     *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3603                                           meta);
3604     g_hash_table_destroy(meta);
3605 }
3606 
3607 /*!
3608  * \internal
3609  * \brief Update resource role, failure handling, etc., after a failed action
3610  *
3611  * \param[in,out] history         Parsed action result history
3612  * \param[in]     config_on_fail  Action failure handling from configuration
3613  * \param[in]     fail_role       Resource's role after failure of this action
3614  * \param[out]    last_failure    This will be set to the history XML
3615  * \param[in,out] on_fail         Actual handling of action result
3616  */
3617 static void
3618 unpack_rsc_op_failure(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
3619                       enum pcmk__on_fail config_on_fail,
3620                       enum rsc_role_e fail_role, xmlNode **last_failure,
3621                       enum pcmk__on_fail *on_fail)
3622 {
3623     bool is_probe = false;
3624     char *last_change_s = NULL;
3625     pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3626 
3627     *last_failure = history->xml;
3628 
3629     is_probe = pcmk_xe_is_probe(history->xml);
3630     last_change_s = last_change_str(history->xml);
3631 
3632     if (!pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)
3633         && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3634         crm_trace("Unexpected result (%s%s%s) was recorded for "
3635                   "%s of %s on %s at %s " QB_XS " exit-status=%d id=%s",
3636                   crm_exit_str(history->exit_status),
3637                   (pcmk__str_empty(history->exit_reason)? "" : ": "),
3638                   pcmk__s(history->exit_reason, ""),
3639                   (is_probe? "probe" : history->task), history->rsc->id,
3640                   pcmk__node_name(history->node), last_change_s,
3641                   history->exit_status, history->id);
3642     } else {
3643         pcmk__sched_warn(scheduler,
3644                          "Unexpected result (%s%s%s) was recorded for %s of "
3645                          "%s on %s at %s " QB_XS " exit-status=%d id=%s",
3646                          crm_exit_str(history->exit_status),
3647                          (pcmk__str_empty(history->exit_reason)? "" : ": "),
3648                          pcmk__s(history->exit_reason, ""),
3649                          (is_probe? "probe" : history->task), history->rsc->id,
3650                          pcmk__node_name(history->node), last_change_s,
3651                          history->exit_status, history->id);
3652 
3653         if (is_probe && (history->exit_status != PCMK_OCF_OK)
3654             && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3655             && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3656 
3657             /* A failed (not just unexpected) probe result could mean the user
3658              * didn't know resources will be probed even where they can't run.
3659              */
3660             crm_notice("If it is not possible for %s to run on %s, see "
3661                        "the " PCMK_XA_RESOURCE_DISCOVERY " option for location "
3662                        "constraints",
3663                        history->rsc->id, pcmk__node_name(history->node));
3664         }
3665 
3666         record_failed_op(history);
3667     }
3668 
3669     free(last_change_s);
3670 
3671     if (*on_fail < config_on_fail) {
3672         pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3673                         pcmk__on_fail_text(*on_fail),
3674                         pcmk__on_fail_text(config_on_fail), history->key);
3675         *on_fail = config_on_fail;
3676     }
3677 
3678     if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3679         resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
3680                           "__stop_fail__", scheduler);
3681 
3682     } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3683         unpack_migrate_to_failure(history);
3684 
3685     } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3686         unpack_migrate_from_failure(history);
3687 
3688     } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3689         history->rsc->priv->orig_role = pcmk_role_promoted;
3690 
3691     } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3692         if (config_on_fail == pcmk__on_fail_block) {
3693             history->rsc->priv->orig_role = pcmk_role_promoted;
3694             pe__set_next_role(history->rsc, pcmk_role_stopped,
3695                               "demote with " PCMK_META_ON_FAIL "=block");
3696 
3697         } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3698             history->rsc->priv->orig_role = pcmk_role_stopped;
3699 
3700         } else {
3701             /* Staying in the promoted role would put the scheduler and
3702              * controller into a loop. Setting the role to unpromoted is not
3703              * dangerous because the resource will be stopped as part of
3704              * recovery, and any promotion will be ordered after that stop.
3705              */
3706             history->rsc->priv->orig_role = pcmk_role_unpromoted;
3707         }
3708     }
3709 
3710     if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3711         /* leave stopped */
3712         pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3713         history->rsc->priv->orig_role = pcmk_role_stopped;
3714 
3715     } else if (history->rsc->priv->orig_role < pcmk_role_started) {
3716         pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3717         set_active(history->rsc);
3718     }
3719 
3720     pcmk__rsc_trace(history->rsc,
3721                     "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3722                     history->rsc->id,
3723                     pcmk_role_text(history->rsc->priv->orig_role),
3724                     pcmk__btoa(history->node->details->unclean),
3725                     pcmk__on_fail_text(config_on_fail),
3726                     pcmk_role_text(fail_role));
3727 
3728     if ((fail_role != pcmk_role_started)
3729         && (history->rsc->priv->next_role < fail_role)) {
3730         pe__set_next_role(history->rsc, fail_role, "failure");
3731     }
3732 
3733     if (fail_role == pcmk_role_stopped) {
3734         ban_from_all_nodes(history->rsc);
3735     }
3736 }
3737 
3738 /*!
3739  * \internal
3740  * \brief Block a resource with a failed action if it cannot be recovered
3741  *
3742  * If resource action is a failed stop and fencing is not possible, mark the
3743  * resource as unmanaged and blocked, since recovery cannot be done.
3744  *
3745  * \param[in,out] history  Parsed action history entry
3746  */
3747 static void
3748 block_if_unrecoverable(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3749 {
3750     char *last_change_s = NULL;
3751 
3752     if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3753         return; // All actions besides stop are always recoverable
3754     }
3755     if (pe_can_fence(history->node->priv->scheduler, history->node)) {
3756         return; // Failed stops are recoverable via fencing
3757     }
3758 
3759     last_change_s = last_change_str(history->xml);
3760     pcmk__sched_err(history->node->priv->scheduler,
3761                     "No further recovery can be attempted for %s "
3762                     "because %s on %s failed (%s%s%s) at %s "
3763                     QB_XS " rc=%d id=%s",
3764                     history->rsc->id, history->task,
3765                     pcmk__node_name(history->node),
3766                     crm_exit_str(history->exit_status),
3767                     (pcmk__str_empty(history->exit_reason)? "" : ": "),
3768                     pcmk__s(history->exit_reason, ""),
3769                     last_change_s, history->exit_status, history->id);
3770 
3771     free(last_change_s);
3772 
3773     pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_managed);
3774     pcmk__set_rsc_flags(history->rsc, pcmk__rsc_blocked);
3775 }
3776 
3777 /*!
3778  * \internal
3779  * \brief Update action history's execution status and why
3780  *
3781  * \param[in,out] history  Parsed action history entry
3782  * \param[out]    why      Where to store reason for update
3783  * \param[in]     value    New value
3784  * \param[in]     reason   Description of why value was changed
3785  */
3786 static inline void
3787 remap_because(struct action_history *history, const char **why, int value,
     /* [previous][next][first][last][top][bottom][index][help] */
3788               const char *reason)
3789 {
3790     if (history->execution_status != value) {
3791         history->execution_status = value;
3792         *why = reason;
3793     }
3794 }
3795 
3796 /*!
3797  * \internal
3798  * \brief Remap informational monitor results and operation status
3799  *
3800  * For the monitor results, certain OCF codes are for providing extended information
3801  * to the user about services that aren't yet failed but not entirely healthy either.
3802  * These must be treated as the "normal" result by Pacemaker.
3803  *
3804  * For operation status, the action result can be used to determine an appropriate
3805  * status for the purposes of responding to the action.  The status provided by the
3806  * executor is not directly usable since the executor does not know what was expected.
3807  *
3808  * \param[in,out] history  Parsed action history entry
3809  * \param[in,out] on_fail  What should be done about the result
3810  * \param[in]     expired  Whether result is expired
3811  *
3812  * \note If the result is remapped and the node is not shutting down or failed,
3813  *       the operation will be recorded in the scheduler data's list of failed
3814  *       operations to highlight it for the user.
3815  *
3816  * \note This may update the resource's current and next role.
3817  */
3818 static void
3819 remap_operation(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
3820                 enum pcmk__on_fail *on_fail, bool expired)
3821 {
3822     bool is_probe = false;
3823     int orig_exit_status = history->exit_status;
3824     int orig_exec_status = history->execution_status;
3825     const char *why = NULL;
3826     const char *task = history->task;
3827 
3828     // Remap degraded results to their successful counterparts
3829     history->exit_status = pcmk__effective_rc(history->exit_status);
3830     if (history->exit_status != orig_exit_status) {
3831         why = "degraded result";
3832         if (!expired && (!history->node->details->shutdown
3833                          || history->node->details->online)) {
3834             record_failed_op(history);
3835         }
3836     }
3837 
3838     if (!pcmk__is_bundled(history->rsc)
3839         && pcmk_xe_mask_probe_failure(history->xml)
3840         && ((history->execution_status != PCMK_EXEC_DONE)
3841             || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3842         history->execution_status = PCMK_EXEC_DONE;
3843         history->exit_status = PCMK_OCF_NOT_RUNNING;
3844         why = "equivalent probe result";
3845     }
3846 
3847     /* If the executor reported an execution status of anything but done or
3848      * error, consider that final. But for done or error, we know better whether
3849      * it should be treated as a failure or not, because we know the expected
3850      * result.
3851      */
3852     switch (history->execution_status) {
3853         case PCMK_EXEC_DONE:
3854         case PCMK_EXEC_ERROR:
3855             break;
3856 
3857         // These should be treated as node-fatal
3858         case PCMK_EXEC_NO_FENCE_DEVICE:
3859         case PCMK_EXEC_NO_SECRETS:
3860             remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3861                           "node-fatal error");
3862             goto remap_done;
3863 
3864         default:
3865             goto remap_done;
3866     }
3867 
3868     is_probe = pcmk_xe_is_probe(history->xml);
3869     if (is_probe) {
3870         task = "probe";
3871     }
3872 
3873     if (history->expected_exit_status < 0) {
3874         /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3875          * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3876          * expected exit status in the transition key, which (along with the
3877          * similar case of a corrupted transition key in the CIB) will be
3878          * reported to this function as -1. Pacemaker 2.0+ does not support
3879          * rolling upgrades from those versions or processing of saved CIB files
3880          * from those versions, so we do not need to care much about this case.
3881          */
3882         remap_because(history, &why, PCMK_EXEC_ERROR,
3883                       "obsolete history format");
3884         pcmk__config_warn("Expected result not found for %s on %s "
3885                           "(corrupt or obsolete CIB?)",
3886                           history->key, pcmk__node_name(history->node));
3887 
3888     } else if (history->exit_status == history->expected_exit_status) {
3889         remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
3890 
3891     } else {
3892         remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
3893         pcmk__rsc_debug(history->rsc,
3894                         "%s on %s: expected %d (%s), got %d (%s%s%s)",
3895                         history->key, pcmk__node_name(history->node),
3896                         history->expected_exit_status,
3897                         crm_exit_str(history->expected_exit_status),
3898                         history->exit_status,
3899                         crm_exit_str(history->exit_status),
3900                         (pcmk__str_empty(history->exit_reason)? "" : ": "),
3901                         pcmk__s(history->exit_reason, ""));
3902     }
3903 
3904     switch (history->exit_status) {
3905         case PCMK_OCF_OK:
3906             if (is_probe
3907                 && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
3908                 char *last_change_s = last_change_str(history->xml);
3909 
3910                 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3911                 pcmk__rsc_info(history->rsc,
3912                                "Probe found %s active on %s at %s",
3913                                history->rsc->id, pcmk__node_name(history->node),
3914                                last_change_s);
3915                 free(last_change_s);
3916             }
3917             break;
3918 
3919         case PCMK_OCF_NOT_RUNNING:
3920             if (is_probe
3921                 || (history->expected_exit_status == history->exit_status)
3922                 || !pcmk_is_set(history->rsc->flags, pcmk__rsc_managed)) {
3923 
3924                 /* For probes, recurring monitors for the Stopped role, and
3925                  * unmanaged resources, "not running" is not considered a
3926                  * failure.
3927                  */
3928                 remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
3929                 history->rsc->priv->orig_role = pcmk_role_stopped;
3930                 *on_fail = pcmk__on_fail_ignore;
3931                 pe__set_next_role(history->rsc, pcmk_role_unknown,
3932                                   "not running");
3933             }
3934             break;
3935 
3936         case PCMK_OCF_RUNNING_PROMOTED:
3937             if (is_probe
3938                 && (history->exit_status != history->expected_exit_status)) {
3939                 char *last_change_s = last_change_str(history->xml);
3940 
3941                 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3942                 pcmk__rsc_info(history->rsc,
3943                                "Probe found %s active and promoted on %s at %s",
3944                                 history->rsc->id,
3945                                 pcmk__node_name(history->node), last_change_s);
3946                 free(last_change_s);
3947             }
3948             if (!expired
3949                 || (history->exit_status == history->expected_exit_status)) {
3950                 history->rsc->priv->orig_role = pcmk_role_promoted;
3951             }
3952             break;
3953 
3954         case PCMK_OCF_FAILED_PROMOTED:
3955             if (!expired) {
3956                 history->rsc->priv->orig_role = pcmk_role_promoted;
3957             }
3958             remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
3959             break;
3960 
3961         case PCMK_OCF_NOT_CONFIGURED:
3962             remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
3963             break;
3964 
3965         case PCMK_OCF_UNIMPLEMENT_FEATURE:
3966             {
3967                 guint interval_ms = 0;
3968                 crm_element_value_ms(history->xml, PCMK_META_INTERVAL,
3969                                      &interval_ms);
3970 
3971                 if (interval_ms == 0) {
3972                     if (!expired) {
3973                         block_if_unrecoverable(history);
3974                     }
3975                     remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3976                                   "exit status");
3977                 } else {
3978                     remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
3979                                   "exit status");
3980                 }
3981             }
3982             break;
3983 
3984         case PCMK_OCF_NOT_INSTALLED:
3985         case PCMK_OCF_INVALID_PARAM:
3986         case PCMK_OCF_INSUFFICIENT_PRIV:
3987             if (!expired) {
3988                 block_if_unrecoverable(history);
3989             }
3990             remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
3991             break;
3992 
3993         default:
3994             if (history->execution_status == PCMK_EXEC_DONE) {
3995                 char *last_change_s = last_change_str(history->xml);
3996 
3997                 crm_info("Treating unknown exit status %d from %s of %s "
3998                          "on %s at %s as failure",
3999                          history->exit_status, task, history->rsc->id,
4000                          pcmk__node_name(history->node), last_change_s);
4001                 remap_because(history, &why, PCMK_EXEC_ERROR,
4002                               "unknown exit status");
4003                 free(last_change_s);
4004             }
4005             break;
4006     }
4007 
4008 remap_done:
4009     if (why != NULL) {
4010         pcmk__rsc_trace(history->rsc,
4011                         "Remapped %s result from [%s: %s] to [%s: %s] "
4012                         "because of %s",
4013                         history->key, pcmk_exec_status_str(orig_exec_status),
4014                         crm_exit_str(orig_exit_status),
4015                         pcmk_exec_status_str(history->execution_status),
4016                         crm_exit_str(history->exit_status), why);
4017     }
4018 }
4019 
4020 // return TRUE if start or monitor last failure but parameters changed
4021 static bool
4022 should_clear_for_param_change(const xmlNode *xml_op, const char *task,
     /* [previous][next][first][last][top][bottom][index][help] */
4023                               pcmk_resource_t *rsc, pcmk_node_t *node)
4024 {
4025     if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) {
4026         if (pe__bundle_needs_remote_name(rsc)) {
4027             /* We haven't allocated resources yet, so we can't reliably
4028              * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4029              * When that's needed, defer the check until later.
4030              */
4031             pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure,
4032                                 rsc->priv->scheduler);
4033 
4034         } else {
4035             pcmk__op_digest_t *digest_data = NULL;
4036 
4037             digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4038                                                 rsc->priv->scheduler);
4039             switch (digest_data->rc) {
4040                 case pcmk__digest_unknown:
4041                     crm_trace("Resource %s history entry %s on %s"
4042                               " has no digest to compare",
4043                               rsc->id, pcmk__xe_history_key(xml_op),
4044                               node->priv->id);
4045                     break;
4046                 case pcmk__digest_match:
4047                     break;
4048                 default:
4049                     return TRUE;
4050             }
4051         }
4052     }
4053     return FALSE;
4054 }
4055 
4056 // Order action after fencing of remote node, given connection rsc
4057 static void
4058 order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
     /* [previous][next][first][last][top][bottom][index][help] */
4059                            pcmk_scheduler_t *scheduler)
4060 {
4061     pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
4062 
4063     if (remote_node) {
4064         pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4065                                            FALSE, scheduler);
4066 
4067         order_actions(fence, action, pcmk__ar_first_implies_then);
4068     }
4069 }
4070 
4071 static bool
4072 should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
     /* [previous][next][first][last][top][bottom][index][help] */
4073                               guint interval_ms, bool is_last_failure)
4074 {
4075     /* Clearing failures of recurring monitors has special concerns. The
4076      * executor reports only changes in the monitor result, so if the
4077      * monitor is still active and still getting the same failure result,
4078      * that will go undetected after the failure is cleared.
4079      *
4080      * Also, the operation history will have the time when the recurring
4081      * monitor result changed to the given code, not the time when the
4082      * result last happened.
4083      *
4084      * @TODO We probably should clear such failures only when the failure
4085      * timeout has passed since the last occurrence of the failed result.
4086      * However we don't record that information. We could maybe approximate
4087      * that by clearing only if there is a more recent successful monitor or
4088      * stop result, but we don't even have that information at this point
4089      * since we are still unpacking the resource's operation history.
4090      *
4091      * This is especially important for remote connection resources with a
4092      * reconnect interval, so in that case, we skip clearing failures
4093      * if the remote node hasn't been fenced.
4094      */
4095     if ((rsc->priv->remote_reconnect_ms > 0U)
4096         && pcmk_is_set(rsc->priv->scheduler->flags,
4097                        pcmk__sched_fencing_enabled)
4098         && (interval_ms != 0)
4099         && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4100 
4101         pcmk_node_t *remote_node = pcmk_find_node(rsc->priv->scheduler,
4102                                                   rsc->id);
4103 
4104         if (remote_node && !pcmk_is_set(remote_node->priv->flags,
4105                                         pcmk__node_remote_fenced)) {
4106             if (is_last_failure) {
4107                 crm_info("Waiting to clear monitor failure for remote node %s"
4108                          " until fencing has occurred", rsc->id);
4109             }
4110             return TRUE;
4111         }
4112     }
4113     return FALSE;
4114 }
4115 
4116 /*!
4117  * \internal
4118  * \brief Check operation age and schedule failure clearing when appropriate
4119  *
4120  * This function has two distinct purposes. The first is to check whether an
4121  * operation history entry is expired (i.e. the resource has a failure timeout,
4122  * the entry is older than the timeout, and the resource either has no fail
4123  * count or its fail count is entirely older than the timeout). The second is to
4124  * schedule fail count clearing when appropriate (i.e. the operation is expired
4125  * and either the resource has an expired fail count or the operation is a
4126  * last_failure for a remote connection resource with a reconnect interval,
4127  * or the operation is a last_failure for a start or monitor operation and the
4128  * resource's parameters have changed since the operation).
4129  *
4130  * \param[in,out] history  Parsed action result history
4131  *
4132  * \return true if operation history entry is expired, otherwise false
4133  */
4134 static bool
4135 check_operation_expiry(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
4136 {
4137     bool expired = false;
4138     bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
4139     time_t last_run = 0;
4140     int unexpired_fail_count = 0;
4141     const char *clear_reason = NULL;
4142     const guint expiration_sec =
4143         pcmk__timeout_ms2s(history->rsc->priv->failure_expiration_ms);
4144     pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
4145 
4146     if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4147         pcmk__rsc_trace(history->rsc,
4148                         "Resource history entry %s on %s is not expired: "
4149                         "Not Installed does not expire",
4150                         history->id, pcmk__node_name(history->node));
4151         return false; // "Not installed" must always be cleared manually
4152     }
4153 
4154     if ((expiration_sec > 0)
4155         && (crm_element_value_epoch(history->xml, PCMK_XA_LAST_RC_CHANGE,
4156                                     &last_run) == 0)) {
4157 
4158         /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
4159          * timestamp
4160          */
4161 
4162         time_t now = get_effective_time(scheduler);
4163         time_t last_failure = 0;
4164 
4165         // Is this particular operation history older than the failure timeout?
4166         if ((now >= (last_run + expiration_sec))
4167             && !should_ignore_failure_timeout(history->rsc, history->task,
4168                                               history->interval_ms,
4169                                               is_last_failure)) {
4170             expired = true;
4171         }
4172 
4173         // Does the resource as a whole have an unexpired fail count?
4174         unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4175                                                 &last_failure,
4176                                                 pcmk__fc_effective,
4177                                                 history->xml);
4178 
4179         // Update scheduler recheck time according to *last* failure
4180         crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d "
4181                   "expiration=%s last-failure@%lld",
4182                   history->id, (long long) last_run, (expired? "" : "not "),
4183                   (long long) now, unexpired_fail_count,
4184                   pcmk__readable_interval(expiration_sec * 1000),
4185                   (long long) last_failure);
4186         last_failure += expiration_sec + 1;
4187         if (unexpired_fail_count && (now < last_failure)) {
4188             pe__update_recheck_time(last_failure, scheduler,
4189                                     "fail count expiration");
4190         }
4191     }
4192 
4193     if (expired) {
4194         if (pe_get_failcount(history->node, history->rsc, NULL,
4195                              pcmk__fc_default, history->xml)) {
4196             // There is a fail count ignoring timeout
4197 
4198             if (unexpired_fail_count == 0) {
4199                 // There is no fail count considering timeout
4200                 clear_reason = "it expired";
4201 
4202             } else {
4203                 /* This operation is old, but there is an unexpired fail count.
4204                  * In a properly functioning cluster, this should only be
4205                  * possible if this operation is not a failure (otherwise the
4206                  * fail count should be expired too), so this is really just a
4207                  * failsafe.
4208                  */
4209                 pcmk__rsc_trace(history->rsc,
4210                                 "Resource history entry %s on %s is not "
4211                                 "expired: Unexpired fail count",
4212                                 history->id, pcmk__node_name(history->node));
4213                 expired = false;
4214             }
4215 
4216         } else if (is_last_failure
4217                    && (history->rsc->priv->remote_reconnect_ms > 0U)) {
4218             /* Clear any expired last failure when reconnect interval is set,
4219              * even if there is no fail count.
4220              */
4221             clear_reason = "reconnect interval is set";
4222         }
4223     }
4224 
4225     if (!expired && is_last_failure
4226         && should_clear_for_param_change(history->xml, history->task,
4227                                          history->rsc, history->node)) {
4228         clear_reason = "resource parameters have changed";
4229     }
4230 
4231     if (clear_reason != NULL) {
4232         pcmk_action_t *clear_op = NULL;
4233 
4234         // Schedule clearing of the fail count
4235         clear_op = pe__clear_failcount(history->rsc, history->node,
4236                                        clear_reason, scheduler);
4237 
4238         if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)
4239             && (history->rsc->priv->remote_reconnect_ms > 0)) {
4240             /* If we're clearing a remote connection due to a reconnect
4241              * interval, we want to wait until any scheduled fencing
4242              * completes.
4243              *
4244              * We could limit this to remote_node->details->unclean, but at
4245              * this point, that's always true (it won't be reliable until
4246              * after unpack_node_history() is done).
4247              */
4248             crm_info("Clearing %s failure will wait until any scheduled "
4249                      "fencing of %s completes",
4250                      history->task, history->rsc->id);
4251             order_after_remote_fencing(clear_op, history->rsc, scheduler);
4252         }
4253     }
4254 
4255     if (expired && (history->interval_ms == 0)
4256         && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4257         switch (history->exit_status) {
4258             case PCMK_OCF_OK:
4259             case PCMK_OCF_NOT_RUNNING:
4260             case PCMK_OCF_RUNNING_PROMOTED:
4261             case PCMK_OCF_DEGRADED:
4262             case PCMK_OCF_DEGRADED_PROMOTED:
4263                 // Don't expire probes that return these values
4264                 pcmk__rsc_trace(history->rsc,
4265                                 "Resource history entry %s on %s is not "
4266                                 "expired: Probe result",
4267                              history->id, pcmk__node_name(history->node));
4268                 expired = false;
4269                 break;
4270         }
4271     }
4272 
4273     return expired;
4274 }
4275 
4276 int
4277 pe__target_rc_from_xml(const xmlNode *xml_op)
     /* [previous][next][first][last][top][bottom][index][help] */
4278 {
4279     int target_rc = 0;
4280     const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY);
4281 
4282     if (key == NULL) {
4283         return -1;
4284     }
4285     decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4286     return target_rc;
4287 }
4288 
4289 /*!
4290  * \internal
4291  * \brief Update a resource's state for an action result
4292  *
4293  * \param[in,out] history       Parsed action history entry
4294  * \param[in]     exit_status   Exit status to base new state on
4295  * \param[in]     last_failure  Resource's last_failure entry, if known
4296  * \param[in,out] on_fail       Resource's current failure handling
4297  */
4298 static void
4299 update_resource_state(struct action_history *history, int exit_status,
     /* [previous][next][first][last][top][bottom][index][help] */
4300                       const xmlNode *last_failure,
4301                       enum pcmk__on_fail *on_fail)
4302 {
4303     bool clear_past_failure = false;
4304 
4305     if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4306         || (!pcmk__is_bundled(history->rsc)
4307             && pcmk_xe_mask_probe_failure(history->xml))) {
4308         history->rsc->priv->orig_role = pcmk_role_stopped;
4309 
4310     } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4311         clear_past_failure = true;
4312 
4313     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4314                             pcmk__str_none)) {
4315         if ((last_failure != NULL)
4316             && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4317                             pcmk__str_none)) {
4318             clear_past_failure = true;
4319         }
4320         if (history->rsc->priv->orig_role < pcmk_role_started) {
4321             set_active(history->rsc);
4322         }
4323 
4324     } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4325         history->rsc->priv->orig_role = pcmk_role_started;
4326         clear_past_failure = true;
4327 
4328     } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4329         history->rsc->priv->orig_role = pcmk_role_stopped;
4330         clear_past_failure = true;
4331 
4332     } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4333                             pcmk__str_none)) {
4334         history->rsc->priv->orig_role = pcmk_role_promoted;
4335         clear_past_failure = true;
4336 
4337     } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4338                             pcmk__str_none)) {
4339         if (*on_fail == pcmk__on_fail_demote) {
4340             /* Demote clears an error only if
4341              * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
4342              */
4343             clear_past_failure = true;
4344         }
4345         history->rsc->priv->orig_role = pcmk_role_unpromoted;
4346 
4347     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4348                             pcmk__str_none)) {
4349         history->rsc->priv->orig_role = pcmk_role_started;
4350         clear_past_failure = true;
4351 
4352     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4353                             pcmk__str_none)) {
4354         unpack_migrate_to_success(history);
4355 
4356     } else if (history->rsc->priv->orig_role < pcmk_role_started) {
4357         pcmk__rsc_trace(history->rsc, "%s active on %s",
4358                         history->rsc->id, pcmk__node_name(history->node));
4359         set_active(history->rsc);
4360     }
4361 
4362     if (!clear_past_failure) {
4363         return;
4364     }
4365 
4366     switch (*on_fail) {
4367         case pcmk__on_fail_stop:
4368         case pcmk__on_fail_ban:
4369         case pcmk__on_fail_standby_node:
4370         case pcmk__on_fail_fence_node:
4371             pcmk__rsc_trace(history->rsc,
4372                             "%s (%s) is not cleared by a completed %s",
4373                             history->rsc->id, pcmk__on_fail_text(*on_fail),
4374                             history->task);
4375             break;
4376 
4377         case pcmk__on_fail_block:
4378         case pcmk__on_fail_ignore:
4379         case pcmk__on_fail_demote:
4380         case pcmk__on_fail_restart:
4381         case pcmk__on_fail_restart_container:
4382             *on_fail = pcmk__on_fail_ignore;
4383             pe__set_next_role(history->rsc, pcmk_role_unknown,
4384                               "clear past failures");
4385             break;
4386 
4387         case pcmk__on_fail_reset_remote:
4388             if (history->rsc->priv->remote_reconnect_ms == 0U) {
4389                 /* With no reconnect interval, the connection is allowed to
4390                  * start again after the remote node is fenced and
4391                  * completely stopped. (With a reconnect interval, we wait
4392                  * for the failure to be cleared entirely before attempting
4393                  * to reconnect.)
4394                  */
4395                 *on_fail = pcmk__on_fail_ignore;
4396                 pe__set_next_role(history->rsc, pcmk_role_unknown,
4397                                   "clear past failures and reset remote");
4398             }
4399             break;
4400     }
4401 }
4402 
4403 /*!
4404  * \internal
4405  * \brief Check whether a given history entry matters for resource state
4406  *
4407  * \param[in] history  Parsed action history entry
4408  *
4409  * \return true if action can affect resource state, otherwise false
4410  */
4411 static inline bool
4412 can_affect_state(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
4413 {
4414      return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4415                              PCMK_ACTION_START, PCMK_ACTION_STOP,
4416                              PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
4417                              PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
4418                              "asyncmon", NULL);
4419 }
4420 
4421 /*!
4422  * \internal
4423  * \brief Unpack execution/exit status and exit reason from a history entry
4424  *
4425  * \param[in,out] history  Action history entry to unpack
4426  *
4427  * \return Standard Pacemaker return code
4428  */
4429 static int
4430 unpack_action_result(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
4431 {
4432     if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS,
4433                                &(history->execution_status)) < 0)
4434         || (history->execution_status < PCMK_EXEC_PENDING)
4435         || (history->execution_status > PCMK_EXEC_MAX)
4436         || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4437         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4438                          "with invalid " PCMK__XA_OP_STATUS " '%s'",
4439                          history->id, history->rsc->id,
4440                          pcmk__node_name(history->node),
4441                          pcmk__s(crm_element_value(history->xml,
4442                                                    PCMK__XA_OP_STATUS),
4443                                  ""));
4444         return pcmk_rc_unpack_error;
4445     }
4446     if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE,
4447                                &(history->exit_status)) < 0)
4448         || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4449         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4450                          "with invalid " PCMK__XA_RC_CODE " '%s'",
4451                          history->id, history->rsc->id,
4452                          pcmk__node_name(history->node),
4453                          pcmk__s(crm_element_value(history->xml,
4454                                                    PCMK__XA_RC_CODE),
4455                                  ""));
4456         return pcmk_rc_unpack_error;
4457     }
4458     history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON);
4459     return pcmk_rc_ok;
4460 }
4461 
4462 /*!
4463  * \internal
4464  * \brief Process an action history entry whose result expired
4465  *
4466  * \param[in,out] history           Parsed action history entry
4467  * \param[in]     orig_exit_status  Action exit status before remapping
4468  *
4469  * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the
4470  *         entry needs no further processing)
4471  */
4472 static int
4473 process_expired_result(struct action_history *history, int orig_exit_status)
     /* [previous][next][first][last][top][bottom][index][help] */
4474 {
4475     if (!pcmk__is_bundled(history->rsc)
4476         && pcmk_xe_mask_probe_failure(history->xml)
4477         && (orig_exit_status != history->expected_exit_status)) {
4478 
4479         if (history->rsc->priv->orig_role <= pcmk_role_stopped) {
4480             history->rsc->priv->orig_role = pcmk_role_unknown;
4481         }
4482         crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
4483                   "Masked failure expired",
4484                   history->id, history->rsc->id,
4485                   pcmk__node_name(history->node));
4486         return pcmk_rc_ok;
4487     }
4488 
4489     if (history->exit_status == history->expected_exit_status) {
4490         return pcmk_rc_undetermined; // Only failures expire
4491     }
4492 
4493     if (history->interval_ms == 0) {
4494         crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
4495                    "Expired failure",
4496                    history->id, history->task, history->rsc->id,
4497                    pcmk__node_name(history->node));
4498         return pcmk_rc_ok;
4499     }
4500 
4501     if (history->node->details->online && !history->node->details->unclean) {
4502         /* Reschedule the recurring action. schedule_cancel() won't work at
4503          * this stage, so as a hacky workaround, forcibly change the restart
4504          * digest so pcmk__check_action_config() does what we want later.
4505          *
4506          * @TODO We should skip this if there is a newer successful monitor.
4507          *       Also, this causes rescheduling only if the history entry
4508          *       has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
4509          *       scheduler regression test doesn't, but that may not be a
4510          *       realistic scenario in production).
4511          */
4512         crm_notice("Rescheduling %s-interval %s of %s on %s "
4513                    "after failure expired",
4514                    pcmk__readable_interval(history->interval_ms), history->task,
4515                    history->rsc->id, pcmk__node_name(history->node));
4516         crm_xml_add(history->xml, PCMK__XA_OP_RESTART_DIGEST,
4517                     "calculated-failure-timeout");
4518         return pcmk_rc_ok;
4519     }
4520 
4521     return pcmk_rc_undetermined;
4522 }
4523 
4524 /*!
4525  * \internal
4526  * \brief Process a masked probe failure
4527  *
4528  * \param[in,out] history           Parsed action history entry
4529  * \param[in]     orig_exit_status  Action exit status before remapping
4530  * \param[in]     last_failure      Resource's last_failure entry, if known
4531  * \param[in,out] on_fail           Resource's current failure handling
4532  */
4533 static void
4534 mask_probe_failure(struct action_history *history, int orig_exit_status,
     /* [previous][next][first][last][top][bottom][index][help] */
4535                    const xmlNode *last_failure,
4536                    enum pcmk__on_fail *on_fail)
4537 {
4538     pcmk_resource_t *ban_rsc = history->rsc;
4539 
4540     if (!pcmk_is_set(history->rsc->flags, pcmk__rsc_unique)) {
4541         ban_rsc = uber_parent(history->rsc);
4542     }
4543 
4544     crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
4545                crm_exit_str(orig_exit_status), history->rsc->id,
4546                pcmk__node_name(history->node));
4547     update_resource_state(history, history->expected_exit_status, last_failure,
4548                           on_fail);
4549     crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name);
4550 
4551     record_failed_op(history);
4552     resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
4553                       "masked-probe-failure", ban_rsc->priv->scheduler);
4554 }
4555 
4556 /*!
4557  * \internal Check whether a given failure is for a given pending action
4558  *
4559  * \param[in] history       Parsed history entry for pending action
4560  * \param[in] last_failure  Resource's last_failure entry, if known
4561  *
4562  * \return true if \p last_failure is failure of pending action in \p history,
4563  *         otherwise false
4564  * \note Both \p history and \p last_failure must come from the same
4565  *       \c PCMK__XE_LRM_RESOURCE block, as node and resource are assumed to be
4566  *       the same.
4567  */
4568 static bool
4569 failure_is_newer(const struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
4570                  const xmlNode *last_failure)
4571 {
4572     guint failure_interval_ms = 0U;
4573     long long failure_change = 0LL;
4574     long long this_change = 0LL;
4575 
4576     if (last_failure == NULL) {
4577         return false; // Resource has no last_failure entry
4578     }
4579 
4580     if (!pcmk__str_eq(history->task,
4581                       crm_element_value(last_failure, PCMK_XA_OPERATION),
4582                       pcmk__str_none)) {
4583         return false; // last_failure is for different action
4584     }
4585 
4586     if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL,
4587                               &failure_interval_ms) != pcmk_ok)
4588         || (history->interval_ms != failure_interval_ms)) {
4589         return false; // last_failure is for action with different interval
4590     }
4591 
4592     if ((pcmk__scan_ll(crm_element_value(history->xml, PCMK_XA_LAST_RC_CHANGE),
4593                        &this_change, 0LL) != pcmk_rc_ok)
4594         || (pcmk__scan_ll(crm_element_value(last_failure,
4595                                             PCMK_XA_LAST_RC_CHANGE),
4596                           &failure_change, 0LL) != pcmk_rc_ok)
4597         || (failure_change < this_change)) {
4598         return false; // Failure is not known to be newer
4599     }
4600 
4601     return true;
4602 }
4603 
4604 /*!
4605  * \internal
4606  * \brief Update a resource's role etc. for a pending action
4607  *
4608  * \param[in,out] history       Parsed history entry for pending action
4609  * \param[in]     last_failure  Resource's last_failure entry, if known
4610  */
4611 static void
4612 process_pending_action(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
4613                        const xmlNode *last_failure)
4614 {
4615     /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4616      * and there might be a RSC_monitor_INTERVAL entry with the last successful
4617      * or pending result.
4618      *
4619      * If last_failure contains the failure of the pending recurring monitor
4620      * we're processing here, and is newer, the action is no longer pending.
4621      * (Pending results have call ID -1, which sorts last, so the last failure
4622      * if any should be known.)
4623      */
4624     if (failure_is_newer(history, last_failure)) {
4625         return;
4626     }
4627 
4628     if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4629         pcmk__set_rsc_flags(history->rsc, pcmk__rsc_start_pending);
4630         set_active(history->rsc);
4631 
4632     } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4633         history->rsc->priv->orig_role = pcmk_role_promoted;
4634 
4635     } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4636                && history->node->details->unclean) {
4637         /* A migrate_to action is pending on a unclean source, so force a stop
4638          * on the target.
4639          */
4640         const char *migrate_target = NULL;
4641         pcmk_node_t *target = NULL;
4642 
4643         migrate_target = crm_element_value(history->xml,
4644                                            PCMK__META_MIGRATE_TARGET);
4645         target = pcmk_find_node(history->rsc->priv->scheduler,
4646                                 migrate_target);
4647         if (target != NULL) {
4648             stop_action(history->rsc, target, FALSE);
4649         }
4650     }
4651 
4652     if (history->rsc->priv->pending_action != NULL) {
4653         /* There should never be multiple pending actions, but as a failsafe,
4654          * just remember the first one processed for display purposes.
4655          */
4656         return;
4657     }
4658 
4659     if (pcmk_is_probe(history->task, history->interval_ms)) {
4660         /* Pending probes are currently never displayed, even if pending
4661          * operations are requested. If we ever want to change that,
4662          * enable the below and the corresponding part of
4663          * native.c:native_pending_action().
4664          */
4665 #if 0
4666         history->rsc->private->pending_action = strdup("probe");
4667         history->rsc->private->pending_node = history->node;
4668 #endif
4669     } else {
4670         history->rsc->priv->pending_action = strdup(history->task);
4671         history->rsc->priv->pending_node = history->node;
4672     }
4673 }
4674 
4675 static void
4676 unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
     /* [previous][next][first][last][top][bottom][index][help] */
4677               xmlNode **last_failure, enum pcmk__on_fail *on_fail)
4678 {
4679     int old_rc = 0;
4680     bool expired = false;
4681     pcmk_resource_t *parent = rsc;
4682     enum rsc_role_e fail_role = pcmk_role_unknown;
4683     enum pcmk__on_fail failure_strategy = pcmk__on_fail_restart;
4684 
4685     struct action_history history = {
4686         .rsc = rsc,
4687         .node = node,
4688         .xml = xml_op,
4689         .execution_status = PCMK_EXEC_UNKNOWN,
4690     };
4691 
4692     CRM_CHECK(rsc && node && xml_op, return);
4693 
4694     history.id = pcmk__xe_id(xml_op);
4695     if (history.id == NULL) {
4696         pcmk__config_err("Ignoring resource history entry for %s on %s "
4697                          "without ID", rsc->id, pcmk__node_name(node));
4698         return;
4699     }
4700 
4701     // Task and interval
4702     history.task = crm_element_value(xml_op, PCMK_XA_OPERATION);
4703     if (history.task == NULL) {
4704         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4705                          "without " PCMK_XA_OPERATION,
4706                          history.id, rsc->id, pcmk__node_name(node));
4707         return;
4708     }
4709     crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
4710     if (!can_affect_state(&history)) {
4711         pcmk__rsc_trace(rsc,
4712                         "Ignoring resource history entry %s for %s on %s "
4713                         "with irrelevant action '%s'",
4714                         history.id, rsc->id, pcmk__node_name(node),
4715                         history.task);
4716         return;
4717     }
4718 
4719     if (unpack_action_result(&history) != pcmk_rc_ok) {
4720         return; // Error already logged
4721     }
4722 
4723     history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4724     history.key = pcmk__xe_history_key(xml_op);
4725     crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
4726 
4727     pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4728                     history.id, history.task, history.call_id,
4729                     pcmk__node_name(node),
4730                     pcmk_exec_status_str(history.execution_status),
4731                     crm_exit_str(history.exit_status));
4732 
4733     if (node->details->unclean) {
4734         pcmk__rsc_trace(rsc,
4735                         "%s is running on %s, which is unclean (further action "
4736                         "depends on value of stop's on-fail attribute)",
4737                         rsc->id, pcmk__node_name(node));
4738     }
4739 
4740     expired = check_operation_expiry(&history);
4741     old_rc = history.exit_status;
4742 
4743     remap_operation(&history, on_fail, expired);
4744 
4745     if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4746         goto done;
4747     }
4748 
4749     if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4750         mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4751         goto done;
4752     }
4753 
4754     if (!pcmk_is_set(rsc->flags, pcmk__rsc_unique)) {
4755         parent = uber_parent(rsc);
4756     }
4757 
4758     switch (history.execution_status) {
4759         case PCMK_EXEC_PENDING:
4760             process_pending_action(&history, *last_failure);
4761             goto done;
4762 
4763         case PCMK_EXEC_DONE:
4764             update_resource_state(&history, history.exit_status, *last_failure,
4765                                   on_fail);
4766             goto done;
4767 
4768         case PCMK_EXEC_NOT_INSTALLED:
4769             unpack_failure_handling(&history, &failure_strategy, &fail_role);
4770             if (failure_strategy == pcmk__on_fail_ignore) {
4771                 crm_warn("Cannot ignore failed %s of %s on %s: "
4772                          "Resource agent doesn't exist "
4773                          QB_XS " status=%d rc=%d id=%s",
4774                          history.task, rsc->id, pcmk__node_name(node),
4775                          history.execution_status, history.exit_status,
4776                          history.id);
4777                 /* Also for printing it as "FAILED" by marking it as
4778                  * pcmk__rsc_failed later
4779                  */
4780                 *on_fail = pcmk__on_fail_ban;
4781             }
4782             resource_location(parent, node, -PCMK_SCORE_INFINITY,
4783                               "hard-error", rsc->priv->scheduler);
4784             unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4785                                   last_failure, on_fail);
4786             goto done;
4787 
4788         case PCMK_EXEC_NOT_CONNECTED:
4789             if (pcmk__is_pacemaker_remote_node(node)
4790                 && pcmk_is_set(node->priv->remote->flags,
4791                                pcmk__rsc_managed)) {
4792                 /* We should never get into a situation where a managed remote
4793                  * connection resource is considered OK but a resource action
4794                  * behind the connection gets a "not connected" status. But as a
4795                  * fail-safe in case a bug or unusual circumstances do lead to
4796                  * that, ensure the remote connection is considered failed.
4797                  */
4798                 pcmk__set_rsc_flags(node->priv->remote,
4799                                     pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
4800             }
4801             break; // Not done, do error handling
4802 
4803         case PCMK_EXEC_ERROR:
4804         case PCMK_EXEC_ERROR_HARD:
4805         case PCMK_EXEC_ERROR_FATAL:
4806         case PCMK_EXEC_TIMEOUT:
4807         case PCMK_EXEC_NOT_SUPPORTED:
4808         case PCMK_EXEC_INVALID:
4809             break; // Not done, do error handling
4810 
4811         default: // No other value should be possible at this point
4812             break;
4813     }
4814 
4815     unpack_failure_handling(&history, &failure_strategy, &fail_role);
4816     if ((failure_strategy == pcmk__on_fail_ignore)
4817         || ((failure_strategy == pcmk__on_fail_restart_container)
4818             && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4819 
4820         char *last_change_s = last_change_str(xml_op);
4821 
4822         crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4823                  QB_XS " %s",
4824                  history.task, crm_exit_str(history.exit_status),
4825                  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4826                  pcmk__s(history.exit_reason, ""), rsc->id,
4827                  pcmk__node_name(node), last_change_s, history.id);
4828         free(last_change_s);
4829 
4830         update_resource_state(&history, history.expected_exit_status,
4831                               *last_failure, on_fail);
4832         crm_xml_add(xml_op, PCMK_XA_UNAME, node->priv->name);
4833         pcmk__set_rsc_flags(rsc, pcmk__rsc_ignore_failure);
4834 
4835         record_failed_op(&history);
4836 
4837         if ((failure_strategy == pcmk__on_fail_restart_container)
4838             && (*on_fail <= pcmk__on_fail_restart)) {
4839             *on_fail = failure_strategy;
4840         }
4841 
4842     } else {
4843         unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4844                               last_failure, on_fail);
4845 
4846         if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4847             uint8_t log_level = LOG_ERR;
4848 
4849             if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4850                 log_level = LOG_NOTICE;
4851             }
4852             do_crm_log(log_level,
4853                        "Preventing %s from restarting on %s because "
4854                        "of hard failure (%s%s%s) " QB_XS " %s",
4855                        parent->id, pcmk__node_name(node),
4856                        crm_exit_str(history.exit_status),
4857                        (pcmk__str_empty(history.exit_reason)? "" : ": "),
4858                        pcmk__s(history.exit_reason, ""), history.id);
4859             resource_location(parent, node, -PCMK_SCORE_INFINITY,
4860                               "hard-error", rsc->priv->scheduler);
4861 
4862         } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4863             pcmk__sched_err(rsc->priv->scheduler,
4864                             "Preventing %s from restarting anywhere because "
4865                             "of fatal failure (%s%s%s) " QB_XS " %s",
4866                             parent->id, crm_exit_str(history.exit_status),
4867                             (pcmk__str_empty(history.exit_reason)? "" : ": "),
4868                             pcmk__s(history.exit_reason, ""), history.id);
4869             resource_location(parent, NULL, -PCMK_SCORE_INFINITY,
4870                               "fatal-error", rsc->priv->scheduler);
4871         }
4872     }
4873 
4874 done:
4875     pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
4876                     rsc->id, pcmk__node_name(node), history.id,
4877                     pcmk_role_text(rsc->priv->orig_role),
4878                     pcmk_role_text(rsc->priv->next_role));
4879 }
4880 
4881 /*!
4882  * \internal
4883  * \brief Insert a node attribute with value into a \c GHashTable
4884  *
4885  * \param[in,out] key        Key to insert (either freed or owned by
4886  *                           \p user_data upon return)
4887  * \param[in]     value      Value to insert (owned by \p user_data upon return)
4888  * \param[in]     user_data  \c GHashTable to insert into
4889  */
4890 static gboolean
4891 insert_attr(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
4892 {
4893     GHashTable *table = user_data;
4894 
4895     g_hash_table_insert(table, key, value);
4896     return TRUE;
4897 }
4898 
4899 static void
4900 add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
     /* [previous][next][first][last][top][bottom][index][help] */
4901                pcmk_scheduler_t *scheduler)
4902 {
4903     const char *cluster_name = NULL;
4904     const char *dc_id = crm_element_value(scheduler->input, PCMK_XA_DC_UUID);
4905 
4906     pe_rule_eval_data_t rule_data = {
4907         .node_hash = NULL,
4908         .now = scheduler->priv->now,
4909         .match_data = NULL,
4910         .rsc_data = NULL,
4911         .op_data = NULL
4912     };
4913 
4914     pcmk__insert_dup(node->priv->attrs,
4915                      CRM_ATTR_UNAME, node->priv->name);
4916 
4917     pcmk__insert_dup(node->priv->attrs, CRM_ATTR_ID, node->priv->id);
4918 
4919     if ((scheduler->dc_node == NULL)
4920         && pcmk__str_eq(node->priv->id, dc_id, pcmk__str_casei)) {
4921 
4922         scheduler->dc_node = node;
4923         pcmk__insert_dup(node->priv->attrs,
4924                          CRM_ATTR_IS_DC, PCMK_VALUE_TRUE);
4925 
4926     } else if (!pcmk__same_node(node, scheduler->dc_node)) {
4927         pcmk__insert_dup(node->priv->attrs,
4928                          CRM_ATTR_IS_DC, PCMK_VALUE_FALSE);
4929     }
4930 
4931     cluster_name = g_hash_table_lookup(scheduler->priv->options,
4932                                        PCMK_OPT_CLUSTER_NAME);
4933     if (cluster_name) {
4934         pcmk__insert_dup(node->priv->attrs, CRM_ATTR_CLUSTER_NAME,
4935                          cluster_name);
4936     }
4937 
4938     if (overwrite) {
4939         /* @TODO Try to reorder some unpacking so that we don't need the
4940          * overwrite argument or to unpack into a temporary table
4941          */
4942         GHashTable *unpacked = pcmk__strkey_table(free, free);
4943 
4944         pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
4945                                    &rule_data, unpacked, NULL, scheduler);
4946         g_hash_table_foreach_steal(unpacked, insert_attr, node->priv->attrs);
4947         g_hash_table_destroy(unpacked);
4948 
4949     } else {
4950         pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
4951                                    &rule_data, node->priv->attrs, NULL,
4952                                    scheduler);
4953     }
4954 
4955     pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_data,
4956                                node->priv->utilization, NULL, scheduler);
4957 
4958     if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
4959                         pcmk__rsc_node_current) == NULL) {
4960         const char *site_name = pcmk__node_attr(node, "site-name", NULL,
4961                                                 pcmk__rsc_node_current);
4962 
4963         if (site_name) {
4964             pcmk__insert_dup(node->priv->attrs,
4965                              CRM_ATTR_SITE_NAME, site_name);
4966 
4967         } else if (cluster_name) {
4968             /* Default to cluster-name if unset */
4969             pcmk__insert_dup(node->priv->attrs,
4970                              CRM_ATTR_SITE_NAME, cluster_name);
4971         }
4972     }
4973 }
4974 
4975 static GList *
4976 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
     /* [previous][next][first][last][top][bottom][index][help] */
4977 {
4978     int counter = -1;
4979     int stop_index = -1;
4980     int start_index = -1;
4981 
4982     xmlNode *rsc_op = NULL;
4983 
4984     GList *gIter = NULL;
4985     GList *op_list = NULL;
4986     GList *sorted_op_list = NULL;
4987 
4988     /* extract operations */
4989     op_list = NULL;
4990     sorted_op_list = NULL;
4991 
4992     for (rsc_op = pcmk__xe_first_child(rsc_entry, PCMK__XE_LRM_RSC_OP, NULL,
4993                                        NULL);
4994          rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
4995 
4996         crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc);
4997         crm_xml_add(rsc_op, PCMK_XA_UNAME, node);
4998         op_list = g_list_prepend(op_list, rsc_op);
4999     }
5000 
5001     if (op_list == NULL) {
5002         /* if there are no operations, there is nothing to do */
5003         return NULL;
5004     }
5005 
5006     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
5007 
5008     /* create active recurring operations as optional */
5009     if (active_filter == FALSE) {
5010         return sorted_op_list;
5011     }
5012 
5013     op_list = NULL;
5014 
5015     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
5016 
5017     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5018         xmlNode *rsc_op = (xmlNode *) gIter->data;
5019 
5020         counter++;
5021 
5022         if (start_index < stop_index) {
5023             crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
5024             break;
5025 
5026         } else if (counter < start_index) {
5027             crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op));
5028             continue;
5029         }
5030         op_list = g_list_append(op_list, rsc_op);
5031     }
5032 
5033     g_list_free(sorted_op_list);
5034     return op_list;
5035 }
5036 
5037 GList *
5038 find_operations(const char *rsc, const char *node, gboolean active_filter,
     /* [previous][next][first][last][top][bottom][index][help] */
5039                 pcmk_scheduler_t *scheduler)
5040 {
5041     GList *output = NULL;
5042     GList *intermediate = NULL;
5043 
5044     xmlNode *tmp = NULL;
5045     xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS,
5046                                            NULL, NULL);
5047 
5048     pcmk_node_t *this_node = NULL;
5049 
5050     xmlNode *node_state = NULL;
5051 
5052     CRM_CHECK(status != NULL, return NULL);
5053 
5054     for (node_state = pcmk__xe_first_child(status, PCMK__XE_NODE_STATE, NULL,
5055                                            NULL);
5056          node_state != NULL;
5057          node_state = pcmk__xe_next(node_state, PCMK__XE_NODE_STATE)) {
5058 
5059         const char *uname = crm_element_value(node_state, PCMK_XA_UNAME);
5060 
5061         if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5062             continue;
5063         }
5064 
5065         this_node = pcmk_find_node(scheduler, uname);
5066         if(this_node == NULL) {
5067             CRM_LOG_ASSERT(this_node != NULL);
5068             continue;
5069 
5070         } else if (pcmk__is_pacemaker_remote_node(this_node)) {
5071             determine_remote_online_status(scheduler, this_node);
5072 
5073         } else {
5074             determine_online_status(node_state, this_node, scheduler);
5075         }
5076 
5077         if (this_node->details->online
5078             || pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
5079             /* offline nodes run no resources...
5080              * unless stonith is enabled in which case we need to
5081              *   make sure rsc start events happen after the stonith
5082              */
5083             xmlNode *lrm_rsc = NULL;
5084 
5085             tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
5086                                        NULL);
5087             tmp = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCES, NULL,
5088                                        NULL);
5089 
5090             for (lrm_rsc = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCE,
5091                                                 NULL, NULL);
5092                  lrm_rsc != NULL;
5093                  lrm_rsc = pcmk__xe_next(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
5094 
5095                 const char *rsc_id = crm_element_value(lrm_rsc, PCMK_XA_ID);
5096 
5097                 if ((rsc != NULL)
5098                     && !pcmk__str_eq(rsc_id, rsc, pcmk__str_none)) {
5099                     continue;
5100                 }
5101 
5102                 intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5103                 output = g_list_concat(output, intermediate);
5104             }
5105         }
5106     }
5107 
5108     return output;
5109 }

/* [previous][next][first][last][top][bottom][index][help] */