root/lib/pengine/unpack.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. is_dangling_guest_node
  2. pe_fence_node
  3. set_if_xpath
  4. unpack_config
  5. pe_create_node
  6. expand_remote_rsc_meta
  7. handle_startup_fencing
  8. unpack_nodes
  9. unpack_launcher
  10. unpack_remote_nodes
  11. link_rsc2remotenode
  12. unpack_resources
  13. pcmk__validate_fencing_topology
  14. unpack_tags
  15. unpack_ticket_state
  16. unpack_handle_remote_attrs
  17. unpack_transient_attributes
  18. unpack_node_state
  19. unpack_node_history
  20. unpack_status
  21. unpack_node_member
  22. unpack_node_online
  23. unpack_node_terminate
  24. determine_online_status_no_fencing
  25. pending_too_long
  26. determine_online_status_fencing
  27. determine_remote_online_status
  28. determine_online_status
  29. pe_base_name_end
  30. clone_strip
  31. clone_zero
  32. create_fake_resource
  33. create_anonymous_orphan
  34. find_anonymous_clone
  35. unpack_find_resource
  36. process_orphan_resource
  37. process_rsc_state
  38. process_recurring
  39. calculate_active_ops
  40. unpack_shutdown_lock
  41. unpack_lrm_resource
  42. handle_removed_launched_resources
  43. unpack_node_lrm
  44. set_active
  45. set_node_score
  46. find_lrm_op
  47. find_lrm_resource
  48. unknown_on_node
  49. monitor_not_running_after
  50. non_monitor_after
  51. newer_state_after_migrate
  52. get_migration_node_names
  53. add_dangling_migration
  54. unpack_migrate_to_success
  55. unpack_migrate_to_failure
  56. unpack_migrate_from_failure
  57. record_failed_op
  58. last_change_str
  59. ban_from_all_nodes
  60. unpack_failure_handling
  61. unpack_rsc_op_failure
  62. block_if_unrecoverable
  63. remap_because
  64. remap_operation
  65. should_clear_for_param_change
  66. order_after_remote_fencing
  67. should_ignore_failure_timeout
  68. check_operation_expiry
  69. pe__target_rc_from_xml
  70. update_resource_state
  71. can_affect_state
  72. unpack_action_result
  73. process_expired_result
  74. mask_probe_failure
  75. failure_is_newer
  76. process_pending_action
  77. unpack_rsc_op
  78. insert_attr
  79. add_node_attrs
  80. extract_operations
  81. find_operations

   1 /*
   2  * Copyright 2004-2025 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <stdio.h>
  13 #include <string.h>
  14 #include <time.h>
  15 
  16 #include <glib.h>
  17 #include <libxml/tree.h>                // xmlNode
  18 #include <libxml/xpath.h>               // xmlXPathObject, etc.
  19 
  20 #include <crm/crm.h>
  21 #include <crm/services.h>
  22 #include <crm/common/xml.h>
  23 #include <crm/common/xml_internal.h>
  24 
  25 #include <crm/common/util.h>
  26 #include <crm/pengine/internal.h>
  27 #include <pe_status_private.h>
  28 
  29 CRM_TRACE_INIT_DATA(pe_status);
  30 
  31 // A (parsed) resource action history entry
  32 struct action_history {
  33     pcmk_resource_t *rsc;       // Resource that history is for
  34     pcmk_node_t *node;        // Node that history is for
  35     xmlNode *xml;             // History entry XML
  36 
  37     // Parsed from entry XML
  38     const char *id;           // XML ID of history entry
  39     const char *key;          // Operation key of action
  40     const char *task;         // Action name
  41     const char *exit_reason;  // Exit reason given for result
  42     guint interval_ms;        // Action interval
  43     int call_id;              // Call ID of action
  44     int expected_exit_status; // Expected exit status of action
  45     int exit_status;          // Actual exit status of action
  46     int execution_status;     // Execution status of action
  47 };
  48 
  49 /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
  50  * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
  51  * flag is stringified more readably in log messages.
  52  */
  53 #define set_config_flag(scheduler, option, flag) do {                         \
  54         GHashTable *config_hash = (scheduler)->priv->options;                 \
  55         const char *scf_value = pcmk__cluster_option(config_hash, (option));  \
  56                                                                               \
  57         if (scf_value != NULL) {                                              \
  58             if (crm_is_true(scf_value)) {                                     \
  59                 (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__,   \
  60                                     LOG_TRACE, "Scheduler",                   \
  61                                     crm_system_name, (scheduler)->flags,      \
  62                                     (flag), #flag);                           \
  63             } else {                                                          \
  64                 (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
  65                                     LOG_TRACE, "Scheduler",                   \
  66                                     crm_system_name, (scheduler)->flags,      \
  67                                     (flag), #flag);                           \
  68             }                                                                 \
  69         }                                                                     \
  70     } while(0)
  71 
  72 static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
  73                           xmlNode *xml_op, xmlNode **last_failure,
  74                           enum pcmk__on_fail *failed);
  75 static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
  76                                            pcmk_node_t *this_node);
  77 static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
  78                            bool overwrite, pcmk_scheduler_t *scheduler);
  79 static void determine_online_status(const xmlNode *node_state,
  80                                     pcmk_node_t *this_node,
  81                                     pcmk_scheduler_t *scheduler);
  82 
  83 static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
  84                             pcmk_scheduler_t *scheduler);
  85 
  86 
  87 /*!
  88  * \internal
  89  * \brief Check whether a node is a dangling guest node
  90  *
  91  * \param[in] node  Node to check
  92  *
  93  * \return true if \p node had a Pacemaker Remote connection resource with a
  94  *         launcher that was removed from the CIB, otherwise false.
  95  */
  96 static bool
  97 is_dangling_guest_node(pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
  98 {
  99     return pcmk__is_pacemaker_remote_node(node)
 100            && (node->priv->remote != NULL)
 101            && (node->priv->remote->priv->launcher == NULL)
 102            && pcmk_is_set(node->priv->remote->flags,
 103                           pcmk__rsc_removed_launched);
 104 }
 105 
 106 /*!
 107  * \brief Schedule a fence action for a node
 108  *
 109  * \param[in,out] scheduler       Scheduler data
 110  * \param[in,out] node            Node to fence
 111  * \param[in]     reason          Text description of why fencing is needed
 112  * \param[in]     priority_delay  Whether to consider
 113  *                                \c PCMK_OPT_PRIORITY_FENCING_DELAY
 114  */
 115 void
 116 pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
 117               const char *reason, bool priority_delay)
 118 {
 119     CRM_CHECK(node, return);
 120 
 121     if (pcmk__is_guest_or_bundle_node(node)) {
 122         // Fence a guest or bundle node by marking its launcher as failed
 123         pcmk_resource_t *rsc = node->priv->remote->priv->launcher;
 124 
 125         if (!pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
 126             if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
 127                 crm_notice("Not fencing guest node %s "
 128                            "(otherwise would because %s): "
 129                            "its guest resource %s is unmanaged",
 130                            pcmk__node_name(node), reason, rsc->id);
 131             } else {
 132                 pcmk__sched_warn(scheduler,
 133                                  "Guest node %s will be fenced "
 134                                  "(by recovering its guest resource %s): %s",
 135                                  pcmk__node_name(node), rsc->id, reason);
 136 
 137                 /* We don't mark the node as unclean because that would prevent the
 138                  * node from running resources. We want to allow it to run resources
 139                  * in this transition if the recovery succeeds.
 140                  */
 141                 pcmk__set_node_flags(node, pcmk__node_remote_reset);
 142                 pcmk__set_rsc_flags(rsc,
 143                                     pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
 144             }
 145         }
 146 
 147     } else if (is_dangling_guest_node(node)) {
 148         crm_info("Cleaning up dangling connection for guest node %s: "
 149                  "fencing was already done because %s, "
 150                  "and guest resource no longer exists",
 151                  pcmk__node_name(node), reason);
 152         pcmk__set_rsc_flags(node->priv->remote,
 153                             pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
 154 
 155     } else if (pcmk__is_remote_node(node)) {
 156         pcmk_resource_t *rsc = node->priv->remote;
 157 
 158         if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
 159             crm_notice("Not fencing remote node %s "
 160                        "(otherwise would because %s): connection is unmanaged",
 161                        pcmk__node_name(node), reason);
 162         } else if (!pcmk_is_set(node->priv->flags, pcmk__node_remote_reset)) {
 163             pcmk__set_node_flags(node, pcmk__node_remote_reset);
 164             pcmk__sched_warn(scheduler, "Remote node %s %s: %s",
 165                              pcmk__node_name(node),
 166                              pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
 167                              reason);
 168         }
 169         node->details->unclean = TRUE;
 170         // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
 171         pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
 172 
 173     } else if (node->details->unclean) {
 174         crm_trace("Cluster node %s %s because %s",
 175                   pcmk__node_name(node),
 176                   pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
 177                   reason);
 178 
 179     } else {
 180         pcmk__sched_warn(scheduler, "Cluster node %s %s: %s",
 181                          pcmk__node_name(node),
 182                          pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
 183                          reason);
 184         node->details->unclean = TRUE;
 185         pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
 186     }
 187 }
 188 
 189 // @TODO xpaths can't handle templates, rules, or id-refs
 190 
 191 // nvpair with provides or requires set to unfencing
 192 #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR           \
 193     "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'"   \
 194     "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') "   \
 195     "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
 196 
 197 // unfencing in rsc_defaults or any resource
 198 #define XPATH_ENABLE_UNFENCING \
 199     "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES     \
 200     "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR             \
 201     "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
 202     "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
 203 
 204 static void
 205 set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 206 {
 207     xmlXPathObject *result = NULL;
 208 
 209     if (!pcmk_is_set(scheduler->flags, flag)) {
 210         result = pcmk__xpath_search(scheduler->input->doc, xpath);
 211         if (pcmk__xpath_num_results(result) > 0) {
 212             pcmk__set_scheduler_flags(scheduler, flag);
 213         }
 214         xmlXPathFreeObject(result);
 215     }
 216 }
 217 
 218 gboolean
 219 unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 220 {
 221     const char *value = NULL;
 222     GHashTable *config_hash = pcmk__strkey_table(free, free);
 223 
 224     const pcmk_rule_input_t rule_input = {
 225         .now = scheduler->priv->now,
 226     };
 227 
 228     scheduler->priv->options = config_hash;
 229 
 230     pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET,
 231                                &rule_input, config_hash,
 232                                PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, scheduler);
 233 
 234     pcmk__validate_cluster_options(config_hash);
 235 
 236     set_config_flag(scheduler, PCMK_OPT_ENABLE_STARTUP_PROBES,
 237                     pcmk__sched_probe_resources);
 238     if (!pcmk_is_set(scheduler->flags, pcmk__sched_probe_resources)) {
 239         crm_info("Startup probes: disabled (dangerous)");
 240     }
 241 
 242     value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
 243     if (value && crm_is_true(value)) {
 244         crm_info("Watchdog-based self-fencing will be performed via SBD if "
 245                  "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
 246                  " is nonzero");
 247         pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_fencing);
 248     }
 249 
 250     /* Set certain flags via xpath here, so they can be used before the relevant
 251      * configuration sections are unpacked.
 252      */
 253     set_if_xpath(pcmk__sched_enable_unfencing, XPATH_ENABLE_UNFENCING,
 254                  scheduler);
 255 
 256     value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT);
 257     pcmk_parse_interval_spec(value, &(scheduler->priv->fence_timeout_ms));
 258 
 259     crm_debug("Default fencing action timeout: %s",
 260               pcmk__readable_interval(scheduler->priv->fence_timeout_ms));
 261 
 262     set_config_flag(scheduler, PCMK_OPT_STONITH_ENABLED,
 263                     pcmk__sched_fencing_enabled);
 264     if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
 265         crm_debug("STONITH of failed nodes is enabled");
 266     } else {
 267         crm_debug("STONITH of failed nodes is disabled");
 268     }
 269 
 270     scheduler->priv->fence_action =
 271         pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_ACTION);
 272     crm_trace("STONITH will %s nodes", scheduler->priv->fence_action);
 273 
 274     set_config_flag(scheduler, PCMK_OPT_CONCURRENT_FENCING,
 275                     pcmk__sched_concurrent_fencing);
 276     if (pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) {
 277         crm_debug("Concurrent fencing is enabled");
 278     } else {
 279         crm_debug("Concurrent fencing is disabled");
 280     }
 281 
 282     value = pcmk__cluster_option(config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY);
 283     if (value) {
 284         pcmk_parse_interval_spec(value,
 285                                  &(scheduler->priv->priority_fencing_ms));
 286         crm_trace("Priority fencing delay is %s",
 287                   pcmk__readable_interval(scheduler->priv->priority_fencing_ms));
 288     }
 289 
 290     set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES,
 291                     pcmk__sched_stop_all);
 292     crm_debug("Stop all active resources: %s",
 293               pcmk__flag_text(scheduler->flags, pcmk__sched_stop_all));
 294 
 295     set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER,
 296                     pcmk__sched_symmetric_cluster);
 297     if (pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) {
 298         crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
 299     }
 300 
 301     value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
 302 
 303     if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
 304         scheduler->no_quorum_policy = pcmk_no_quorum_ignore;
 305 
 306     } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
 307         scheduler->no_quorum_policy = pcmk_no_quorum_freeze;
 308 
 309     } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
 310         scheduler->no_quorum_policy = pcmk_no_quorum_demote;
 311 
 312     } else if (pcmk__strcase_any_of(value, PCMK_VALUE_FENCE,
 313                                     PCMK_VALUE_FENCE_LEGACY, NULL)) {
 314         if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
 315             int do_panic = 0;
 316 
 317             crm_element_value_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC,
 318                                   &do_panic);
 319             if (do_panic
 320                 || pcmk_is_set(scheduler->flags, pcmk__sched_quorate)) {
 321                 scheduler->no_quorum_policy = pcmk_no_quorum_fence;
 322             } else {
 323                 crm_notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY
 324                            " to 'stop': cluster has never had quorum");
 325                 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
 326             }
 327         } else {
 328             pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY
 329                              " to 'stop' because fencing is disabled");
 330             scheduler->no_quorum_policy = pcmk_no_quorum_stop;
 331         }
 332 
 333     } else {
 334         scheduler->no_quorum_policy = pcmk_no_quorum_stop;
 335     }
 336 
 337     switch (scheduler->no_quorum_policy) {
 338         case pcmk_no_quorum_freeze:
 339             crm_debug("On loss of quorum: "
 340                       "Freeze resources that require quorum");
 341             break;
 342         case pcmk_no_quorum_stop:
 343             crm_debug("On loss of quorum: "
 344                       "Stop resources that require quorum");
 345             break;
 346         case pcmk_no_quorum_demote:
 347             crm_debug("On loss of quorum: "
 348                       "Demote promotable resources and stop other resources");
 349             break;
 350         case pcmk_no_quorum_fence:
 351             crm_notice("On loss of quorum: Fence all remaining nodes");
 352             break;
 353         case pcmk_no_quorum_ignore:
 354             crm_notice("On loss of quorum: Ignore");
 355             break;
 356     }
 357 
 358     set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_RESOURCES,
 359                     pcmk__sched_stop_removed_resources);
 360     if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
 361         crm_trace("Orphan resources are stopped");
 362     } else {
 363         crm_trace("Orphan resources are ignored");
 364     }
 365 
 366     set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_ACTIONS,
 367                     pcmk__sched_cancel_removed_actions);
 368     if (pcmk_is_set(scheduler->flags, pcmk__sched_cancel_removed_actions)) {
 369         crm_trace("Orphan resource actions are stopped");
 370     } else {
 371         crm_trace("Orphan resource actions are ignored");
 372     }
 373 
 374     set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE,
 375                     pcmk__sched_in_maintenance);
 376     crm_trace("Maintenance mode: %s",
 377               pcmk__flag_text(scheduler->flags, pcmk__sched_in_maintenance));
 378 
 379     set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL,
 380                     pcmk__sched_start_failure_fatal);
 381     if (pcmk_is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) {
 382         crm_trace("Start failures are always fatal");
 383     } else {
 384         crm_trace("Start failures are handled by failcount");
 385     }
 386 
 387     if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
 388         set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING,
 389                         pcmk__sched_startup_fencing);
 390     }
 391     if (pcmk_is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
 392         crm_trace("Unseen nodes will be fenced");
 393     } else {
 394         pcmk__warn_once(pcmk__wo_blind,
 395                         "Blind faith: not fencing unseen nodes");
 396     }
 397 
 398     pe__unpack_node_health_scores(scheduler);
 399 
 400     scheduler->priv->placement_strategy =
 401         pcmk__cluster_option(config_hash, PCMK_OPT_PLACEMENT_STRATEGY);
 402     crm_trace("Placement strategy: %s", scheduler->priv->placement_strategy);
 403 
 404     set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK,
 405                     pcmk__sched_shutdown_lock);
 406     if (pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
 407         value = pcmk__cluster_option(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
 408         pcmk_parse_interval_spec(value, &(scheduler->priv->shutdown_lock_ms));
 409         crm_trace("Resources will be locked to nodes that were cleanly "
 410                   "shut down (locks expire after %s)",
 411                   pcmk__readable_interval(scheduler->priv->shutdown_lock_ms));
 412     } else {
 413         crm_trace("Resources will not be locked to nodes that were cleanly "
 414                   "shut down");
 415     }
 416 
 417     value = pcmk__cluster_option(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
 418     pcmk_parse_interval_spec(value, &(scheduler->priv->node_pending_ms));
 419     if (scheduler->priv->node_pending_ms == 0U) {
 420         crm_trace("Do not fence pending nodes");
 421     } else {
 422         crm_trace("Fence pending nodes after %s",
 423                   pcmk__readable_interval(scheduler->priv->node_pending_ms));
 424     }
 425 
 426     set_config_flag(scheduler, PCMK_OPT_FENCE_REMOTE_WITHOUT_QUORUM,
 427                     pcmk__sched_fence_remote_no_quorum);
 428     if (pcmk_is_set(scheduler->flags, pcmk__sched_fence_remote_no_quorum)) {
 429         crm_trace("Pacemaker Remote nodes may be fenced without quorum");
 430     } else {
 431         crm_trace("Pacemaker Remote nodes require quorum to be fenced");
 432     }
 433 
 434     return TRUE;
 435 }
 436 
 437 /*!
 438  * \internal
 439  * \brief Create a new node object in scheduler data
 440  *
 441  * \param[in]     id         ID of new node
 442  * \param[in]     uname      Name of new node
 443  * \param[in]     type       Type of new node
 444  * \param[in]     score      Score of new node
 445  * \param[in,out] scheduler  Scheduler data
 446  *
 447  * \return Newly created node object
 448  * \note The returned object is part of the scheduler data and should not be
 449  *       freed separately.
 450  */
 451 pcmk_node_t *
 452 pe_create_node(const char *id, const char *uname, const char *type,
     /* [previous][next][first][last][top][bottom][index][help] */
 453                int score, pcmk_scheduler_t *scheduler)
 454 {
 455     enum pcmk__node_variant variant = pcmk__node_variant_cluster;
 456     pcmk_node_t *new_node = NULL;
 457 
 458     if (pcmk_find_node(scheduler, uname) != NULL) {
 459         pcmk__config_warn("More than one node entry has name '%s'", uname);
 460     }
 461 
 462     if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
 463                      pcmk__str_null_matches|pcmk__str_casei)) {
 464         variant = pcmk__node_variant_cluster;
 465 
 466     } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
 467         variant = pcmk__node_variant_remote;
 468 
 469     } else {
 470         pcmk__config_err("Ignoring node %s with unrecognized type '%s'",
 471                          pcmk__s(uname, "without name"), type);
 472         return NULL;
 473     }
 474 
 475     new_node = calloc(1, sizeof(pcmk_node_t));
 476     if (new_node == NULL) {
 477         pcmk__sched_err(scheduler, "Could not allocate memory for node %s",
 478                         uname);
 479         return NULL;
 480     }
 481 
 482     new_node->assign = calloc(1, sizeof(struct pcmk__node_assignment));
 483     new_node->details = calloc(1, sizeof(struct pcmk__node_details));
 484     new_node->priv = calloc(1, sizeof(pcmk__node_private_t));
 485     if ((new_node->assign == NULL) || (new_node->details == NULL)
 486         || (new_node->priv == NULL)) {
 487         free(new_node->assign);
 488         free(new_node->details);
 489         free(new_node->priv);
 490         free(new_node);
 491         pcmk__sched_err(scheduler, "Could not allocate memory for node %s",
 492                         uname);
 493         return NULL;
 494     }
 495 
 496     crm_trace("Creating node for entry %s/%s", uname, id);
 497     new_node->assign->score = score;
 498     new_node->priv->id = id;
 499     new_node->priv->name = uname;
 500     new_node->priv->flags = pcmk__node_probes_allowed;
 501     new_node->details->online = FALSE;
 502     new_node->details->shutdown = FALSE;
 503     new_node->details->running_rsc = NULL;
 504     new_node->priv->scheduler = scheduler;
 505     new_node->priv->variant = variant;
 506     new_node->priv->attrs = pcmk__strkey_table(free, free);
 507     new_node->priv->utilization = pcmk__strkey_table(free, free);
 508     new_node->priv->digest_cache = pcmk__strkey_table(free, pe__free_digests);
 509 
 510     if (pcmk__is_pacemaker_remote_node(new_node)) {
 511         pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "remote");
 512         pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_remote_nodes);
 513     } else {
 514         pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "cluster");
 515     }
 516 
 517     scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
 518                                             pe__cmp_node_name);
 519     return new_node;
 520 }
 521 
 522 static const char *
 523 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
     /* [previous][next][first][last][top][bottom][index][help] */
 524 {
 525     xmlNode *attr_set = NULL;
 526     xmlNode *attr = NULL;
 527 
 528     const char *container_id = pcmk__xe_id(xml_obj);
 529     const char *remote_name = NULL;
 530     const char *remote_server = NULL;
 531     const char *remote_port = NULL;
 532     const char *connect_timeout = "60s";
 533     const char *remote_allow_migrate=NULL;
 534     const char *is_managed = NULL;
 535 
 536     // @TODO This doesn't handle rules or id-ref
 537     for (attr_set = pcmk__xe_first_child(xml_obj, PCMK_XE_META_ATTRIBUTES,
 538                                          NULL, NULL);
 539          attr_set != NULL;
 540          attr_set = pcmk__xe_next(attr_set, PCMK_XE_META_ATTRIBUTES)) {
 541 
 542         for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
 543              attr != NULL; attr = pcmk__xe_next(attr, NULL)) {
 544 
 545             const char *value = crm_element_value(attr, PCMK_XA_VALUE);
 546             const char *name = crm_element_value(attr, PCMK_XA_NAME);
 547 
 548             if (name == NULL) { // Sanity
 549                 continue;
 550             }
 551 
 552             if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
 553                 remote_name = value;
 554 
 555             } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
 556                 remote_server = value;
 557 
 558             } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
 559                 remote_port = value;
 560 
 561             } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
 562                 connect_timeout = value;
 563 
 564             } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
 565                 remote_allow_migrate = value;
 566 
 567             } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
 568                 is_managed = value;
 569             }
 570         }
 571     }
 572 
 573     if (remote_name == NULL) {
 574         return NULL;
 575     }
 576 
 577     if (pe_find_resource(data->priv->resources, remote_name) != NULL) {
 578         return NULL;
 579     }
 580 
 581     pe_create_remote_xml(parent, remote_name, container_id,
 582                          remote_allow_migrate, is_managed,
 583                          connect_timeout, remote_server, remote_port);
 584     return remote_name;
 585 }
 586 
 587 static void
 588 handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
     /* [previous][next][first][last][top][bottom][index][help] */
 589 {
 590     if ((new_node->priv->variant == pcmk__node_variant_remote)
 591         && (new_node->priv->remote == NULL)) {
 592         /* Ignore fencing for remote nodes that don't have a connection resource
 593          * associated with them. This happens when remote node entries get left
 594          * in the nodes section after the connection resource is removed.
 595          */
 596         return;
 597     }
 598 
 599     if (pcmk_is_set(scheduler->flags, pcmk__sched_startup_fencing)) {
 600         // All nodes are unclean until we've seen their status entry
 601         new_node->details->unclean = TRUE;
 602 
 603     } else {
 604         // Blind faith ...
 605         new_node->details->unclean = FALSE;
 606     }
 607 }
 608 
 609 gboolean
 610 unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 611 {
 612     xmlNode *xml_obj = NULL;
 613     pcmk_node_t *new_node = NULL;
 614     const char *id = NULL;
 615     const char *uname = NULL;
 616     const char *type = NULL;
 617 
 618     for (xml_obj = pcmk__xe_first_child(xml_nodes, PCMK_XE_NODE, NULL, NULL);
 619          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, PCMK_XE_NODE)) {
 620 
 621         int score = 0;
 622         int rc = pcmk__xe_get_score(xml_obj, PCMK_XA_SCORE, &score, 0);
 623 
 624         new_node = NULL;
 625 
 626         id = crm_element_value(xml_obj, PCMK_XA_ID);
 627         uname = crm_element_value(xml_obj, PCMK_XA_UNAME);
 628         type = crm_element_value(xml_obj, PCMK_XA_TYPE);
 629         crm_trace("Processing node %s/%s", uname, id);
 630 
 631         if (id == NULL) {
 632             pcmk__config_err("Ignoring <" PCMK_XE_NODE
 633                              "> entry in configuration without id");
 634             continue;
 635         }
 636         if (rc != pcmk_rc_ok) {
 637             // Not possible with schema validation enabled
 638             pcmk__config_warn("Using 0 as score for node %s "
 639                               "because '%s' is not a valid score: %s",
 640                               pcmk__s(uname, "without name"),
 641                               crm_element_value(xml_obj, PCMK_XA_SCORE),
 642                               pcmk_rc_str(rc));
 643         }
 644         new_node = pe_create_node(id, uname, type, score, scheduler);
 645 
 646         if (new_node == NULL) {
 647             return FALSE;
 648         }
 649 
 650         handle_startup_fencing(scheduler, new_node);
 651 
 652         add_node_attrs(xml_obj, new_node, FALSE, scheduler);
 653 
 654         crm_trace("Done with node %s",
 655                   crm_element_value(xml_obj, PCMK_XA_UNAME));
 656     }
 657 
 658     return TRUE;
 659 }
 660 
 661 static void
 662 unpack_launcher(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 663 {
 664     const char *launcher_id = NULL;
 665 
 666     if (rsc->priv->children != NULL) {
 667         g_list_foreach(rsc->priv->children, (GFunc) unpack_launcher,
 668                        scheduler);
 669         return;
 670     }
 671 
 672     launcher_id = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CONTAINER);
 673     if ((launcher_id != NULL)
 674         && !pcmk__str_eq(launcher_id, rsc->id, pcmk__str_none)) {
 675         pcmk_resource_t *launcher = pe_find_resource(scheduler->priv->resources,
 676                                                      launcher_id);
 677 
 678         if (launcher != NULL) {
 679             rsc->priv->launcher = launcher;
 680             launcher->priv->launched =
 681                 g_list_append(launcher->priv->launched, rsc);
 682             pcmk__rsc_trace(rsc, "Resource %s's launcher is %s",
 683                             rsc->id, launcher_id);
 684         } else {
 685             pcmk__config_err("Resource %s: Unknown " PCMK__META_CONTAINER " %s",
 686                              rsc->id, launcher_id);
 687         }
 688     }
 689 }
 690 
 691 gboolean
 692 unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 693 {
 694     xmlNode *xml_obj = NULL;
 695 
 696     /* Create remote nodes and guest nodes from the resource configuration
 697      * before unpacking resources.
 698      */
 699     for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
 700          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
 701 
 702         const char *new_node_id = NULL;
 703 
 704         /* Check for remote nodes, which are defined by ocf:pacemaker:remote
 705          * primitives.
 706          */
 707         if (xml_contains_remote_node(xml_obj)) {
 708             new_node_id = pcmk__xe_id(xml_obj);
 709             /* The pcmk_find_node() check ensures we don't iterate over an
 710              * expanded node that has already been added to the node list
 711              */
 712             if (new_node_id
 713                 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
 714                 crm_trace("Found remote node %s defined by resource %s",
 715                           new_node_id, pcmk__xe_id(xml_obj));
 716                 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
 717                                0, scheduler);
 718             }
 719             continue;
 720         }
 721 
 722         /* Check for guest nodes, which are defined by special meta-attributes
 723          * of a primitive of any type (for example, VirtualDomain or Xen).
 724          */
 725         if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
 726             /* This will add an ocf:pacemaker:remote primitive to the
 727              * configuration for the guest node's connection, to be unpacked
 728              * later.
 729              */
 730             new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
 731                                                  scheduler);
 732             if (new_node_id
 733                 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
 734                 crm_trace("Found guest node %s in resource %s",
 735                           new_node_id, pcmk__xe_id(xml_obj));
 736                 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
 737                                0, scheduler);
 738             }
 739             continue;
 740         }
 741 
 742         /* Check for guest nodes inside a group. Clones are currently not
 743          * supported as guest nodes.
 744          */
 745         if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
 746             xmlNode *xml_obj2 = NULL;
 747             for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
 748                  xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2, NULL)) {
 749 
 750                 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
 751                                                      scheduler);
 752 
 753                 if (new_node_id
 754                     && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
 755                     crm_trace("Found guest node %s in resource %s inside group %s",
 756                               new_node_id, pcmk__xe_id(xml_obj2),
 757                               pcmk__xe_id(xml_obj));
 758                     pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
 759                                    0, scheduler);
 760                 }
 761             }
 762         }
 763     }
 764     return TRUE;
 765 }
 766 
 767 /* Call this after all the nodes and resources have been
 768  * unpacked, but before the status section is read.
 769  *
 770  * A remote node's online status is reflected by the state
 771  * of the remote node's connection resource. We need to link
 772  * the remote node to this connection resource so we can have
 773  * easy access to the connection resource during the scheduler calculations.
 774  */
 775 static void
 776 link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 777 {
 778     pcmk_node_t *remote_node = NULL;
 779 
 780     if (!pcmk_is_set(new_rsc->flags, pcmk__rsc_is_remote_connection)) {
 781         return;
 782     }
 783 
 784     if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) {
 785         /* remote_nodes and remote_resources are not linked in quick location calculations */
 786         return;
 787     }
 788 
 789     remote_node = pcmk_find_node(scheduler, new_rsc->id);
 790     CRM_CHECK(remote_node != NULL, return);
 791 
 792     pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
 793                     new_rsc->id, pcmk__node_name(remote_node));
 794     remote_node->priv->remote = new_rsc;
 795 
 796     if (new_rsc->priv->launcher == NULL) {
 797         /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
 798          * the same as is done for cluster nodes.
 799          */
 800         handle_startup_fencing(scheduler, remote_node);
 801 
 802     } else {
 803         /* pe_create_node() marks the new node as "remote" or "cluster"; now
 804          * that we know the node is a guest node, update it correctly.
 805          */
 806         pcmk__insert_dup(remote_node->priv->attrs,
 807                          CRM_ATTR_KIND, "container");
 808     }
 809 }
 810 
 811 /*!
 812  * \internal
 813  * \brief Parse configuration XML for resource information
 814  *
 815  * \param[in]     xml_resources  Top of resource configuration XML
 816  * \param[in,out] scheduler      Scheduler data
 817  *
 818  * \return TRUE
 819  *
 820  * \note unpack_remote_nodes() MUST be called before this, so that the nodes can
 821  *       be used when pe__unpack_resource() calls resource_location()
 822  */
 823 gboolean
 824 unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 825 {
 826     xmlNode *xml_obj = NULL;
 827     GList *gIter = NULL;
 828 
 829     scheduler->priv->templates = pcmk__strkey_table(free, pcmk__free_idref);
 830 
 831     for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
 832          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
 833 
 834         pcmk_resource_t *new_rsc = NULL;
 835         const char *id = pcmk__xe_id(xml_obj);
 836 
 837         if (pcmk__str_empty(id)) {
 838             pcmk__config_err("Ignoring <%s> resource without ID",
 839                              xml_obj->name);
 840             continue;
 841         }
 842 
 843         if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
 844             if (g_hash_table_lookup_extended(scheduler->priv->templates, id,
 845                                              NULL, NULL) == FALSE) {
 846                 /* Record the template's ID for the knowledge of its existence anyway. */
 847                 pcmk__insert_dup(scheduler->priv->templates, id, NULL);
 848             }
 849             continue;
 850         }
 851 
 852         crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
 853         if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
 854                                 scheduler) == pcmk_rc_ok) {
 855             scheduler->priv->resources =
 856                 g_list_append(scheduler->priv->resources, new_rsc);
 857             pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
 858 
 859         } else {
 860             pcmk__config_err("Ignoring <%s> resource '%s' "
 861                              "because configuration is invalid",
 862                              xml_obj->name, id);
 863         }
 864     }
 865 
 866     for (gIter = scheduler->priv->resources;
 867          gIter != NULL; gIter = gIter->next) {
 868 
 869         pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
 870 
 871         unpack_launcher(rsc, scheduler);
 872         link_rsc2remotenode(scheduler, rsc);
 873     }
 874 
 875     scheduler->priv->resources = g_list_sort(scheduler->priv->resources,
 876                                              pe__cmp_rsc_priority);
 877     if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) {
 878         /* Ignore */
 879 
 880     } else if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)
 881                && !pcmk_is_set(scheduler->flags, pcmk__sched_have_fencing)) {
 882 
 883         pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
 884         pcmk__config_err("Either configure some or disable STONITH with the "
 885                          PCMK_OPT_STONITH_ENABLED " option");
 886         pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
 887     }
 888 
 889     return TRUE;
 890 }
 891 
 892 /*!
 893  * \internal
 894  * \brief Validate the levels in a fencing topology
 895  *
 896  * \param[in] xml  \c PCMK_XE_FENCING_TOPOLOGY element
 897  */
 898 void
 899 pcmk__validate_fencing_topology(const xmlNode *xml)
     /* [previous][next][first][last][top][bottom][index][help] */
 900 {
 901     if (xml == NULL) {
 902         return;
 903     }
 904 
 905     CRM_CHECK(pcmk__xe_is(xml, PCMK_XE_FENCING_TOPOLOGY), return);
 906 
 907     for (const xmlNode *level = pcmk__xe_first_child(xml, PCMK_XE_FENCING_LEVEL,
 908                                                      NULL, NULL);
 909          level != NULL; level = pcmk__xe_next(level, PCMK_XE_FENCING_LEVEL)) {
 910 
 911         const char *id = pcmk__xe_id(level);
 912         int index = 0;
 913 
 914         if (pcmk__str_empty(id)) {
 915             pcmk__config_err("Ignoring fencing level without ID");
 916             continue;
 917         }
 918 
 919         if (crm_element_value_int(level, PCMK_XA_INDEX, &index) != 0) {
 920             pcmk__config_err("Ignoring fencing level %s with invalid index",
 921                              id);
 922             continue;
 923         }
 924 
 925         if ((index < ST__LEVEL_MIN) || (index > ST__LEVEL_MAX)) {
 926             pcmk__config_err("Ignoring fencing level %s with out-of-range "
 927                              "index %d",
 928                              id, index);
 929         }
 930     }
 931 }
 932 
 933 gboolean
 934 unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 935 {
 936     xmlNode *xml_tag = NULL;
 937 
 938     scheduler->priv->tags = pcmk__strkey_table(free, pcmk__free_idref);
 939 
 940     for (xml_tag = pcmk__xe_first_child(xml_tags, PCMK_XE_TAG, NULL, NULL);
 941          xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag, PCMK_XE_TAG)) {
 942 
 943         xmlNode *xml_obj_ref = NULL;
 944         const char *tag_id = pcmk__xe_id(xml_tag);
 945 
 946         if (tag_id == NULL) {
 947             pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
 948                              (const char *) xml_tag->name);
 949             continue;
 950         }
 951 
 952         for (xml_obj_ref = pcmk__xe_first_child(xml_tag, PCMK_XE_OBJ_REF,
 953                                                 NULL, NULL);
 954              xml_obj_ref != NULL;
 955              xml_obj_ref = pcmk__xe_next(xml_obj_ref, PCMK_XE_OBJ_REF)) {
 956 
 957             const char *obj_ref = pcmk__xe_id(xml_obj_ref);
 958 
 959             if (obj_ref == NULL) {
 960                 pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
 961                                  xml_obj_ref->name, tag_id);
 962                 continue;
 963             }
 964 
 965             pcmk__add_idref(scheduler->priv->tags, tag_id, obj_ref);
 966         }
 967     }
 968 
 969     return TRUE;
 970 }
 971 
 972 /*!
 973  * \internal
 974  * \brief Unpack a ticket state entry
 975  *
 976  * \param[in]     xml_ticket  XML ticket state to unpack
 977  * \param[in,out] userdata    Scheduler data
 978  *
 979  * \return pcmk_rc_ok (to always continue unpacking further entries)
 980  */
 981 static int
 982 unpack_ticket_state(xmlNode *xml_ticket, void *userdata)
     /* [previous][next][first][last][top][bottom][index][help] */
 983 {
 984     pcmk_scheduler_t *scheduler = userdata;
 985 
 986     const char *ticket_id = NULL;
 987     const char *granted = NULL;
 988     const char *last_granted = NULL;
 989     const char *standby = NULL;
 990     xmlAttrPtr xIter = NULL;
 991 
 992     pcmk__ticket_t *ticket = NULL;
 993 
 994     ticket_id = pcmk__xe_id(xml_ticket);
 995     if (pcmk__str_empty(ticket_id)) {
 996         return pcmk_rc_ok;
 997     }
 998 
 999     crm_trace("Processing ticket state for %s", ticket_id);
1000 
1001     ticket = g_hash_table_lookup(scheduler->priv->ticket_constraints,
1002                                  ticket_id);
1003     if (ticket == NULL) {
1004         ticket = ticket_new(ticket_id, scheduler);
1005         if (ticket == NULL) {
1006             return pcmk_rc_ok;
1007         }
1008     }
1009 
1010     for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1011         const char *prop_name = (const char *)xIter->name;
1012         const char *prop_value = pcmk__xml_attr_value(xIter);
1013 
1014         if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
1015             continue;
1016         }
1017         pcmk__insert_dup(ticket->state, prop_name, prop_value);
1018     }
1019 
1020     granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
1021     if (granted && crm_is_true(granted)) {
1022         pcmk__set_ticket_flags(ticket, pcmk__ticket_granted);
1023         crm_info("We have ticket '%s'", ticket->id);
1024     } else {
1025         pcmk__clear_ticket_flags(ticket, pcmk__ticket_granted);
1026         crm_info("We do not have ticket '%s'", ticket->id);
1027     }
1028 
1029     last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
1030     if (last_granted) {
1031         long long last_granted_ll = 0LL;
1032         int rc = pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1033 
1034         if (rc != pcmk_rc_ok) {
1035             crm_warn("Using %lld instead of invalid " PCMK_XA_LAST_GRANTED
1036                      " value '%s' in state for ticket %s: %s",
1037                      last_granted_ll, last_granted, ticket->id,
1038                      pcmk_rc_str(rc));
1039         }
1040         ticket->last_granted = (time_t) last_granted_ll;
1041     }
1042 
1043     standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
1044     if (standby && crm_is_true(standby)) {
1045         pcmk__set_ticket_flags(ticket, pcmk__ticket_standby);
1046         if (pcmk_is_set(ticket->flags, pcmk__ticket_granted)) {
1047             crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
1048         }
1049     } else {
1050         pcmk__clear_ticket_flags(ticket, pcmk__ticket_standby);
1051     }
1052 
1053     crm_trace("Done with ticket state for %s", ticket_id);
1054 
1055     return pcmk_rc_ok;
1056 }
1057 
1058 static void
1059 unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
     /* [previous][next][first][last][top][bottom][index][help] */
1060                            pcmk_scheduler_t *scheduler)
1061 {
1062     const char *discovery = NULL;
1063     const xmlNode *attrs = NULL;
1064     pcmk_resource_t *rsc = NULL;
1065     int maint = 0;
1066 
1067     if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1068         return;
1069     }
1070 
1071     if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1072         return;
1073     }
1074     crm_trace("Processing Pacemaker Remote node %s",
1075               pcmk__node_name(this_node));
1076 
1077     pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_IN_MAINTENANCE),
1078                        &maint, 0);
1079     if (maint) {
1080         pcmk__set_node_flags(this_node, pcmk__node_remote_maint);
1081     } else {
1082         pcmk__clear_node_flags(this_node, pcmk__node_remote_maint);
1083     }
1084 
1085     rsc = this_node->priv->remote;
1086     if (!pcmk_is_set(this_node->priv->flags, pcmk__node_remote_reset)) {
1087         this_node->details->unclean = FALSE;
1088         pcmk__set_node_flags(this_node, pcmk__node_seen);
1089     }
1090     attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL,
1091                                  NULL);
1092     add_node_attrs(attrs, this_node, TRUE, scheduler);
1093 
1094     if (pe__shutdown_requested(this_node)) {
1095         crm_info("%s is shutting down", pcmk__node_name(this_node));
1096         this_node->details->shutdown = TRUE;
1097     }
1098 
1099     if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL,
1100                                     pcmk__rsc_node_current))) {
1101         crm_info("%s is in standby mode", pcmk__node_name(this_node));
1102         pcmk__set_node_flags(this_node, pcmk__node_standby);
1103     }
1104 
1105     if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
1106                                     pcmk__rsc_node_current))
1107         || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed))) {
1108         crm_info("%s is in maintenance mode", pcmk__node_name(this_node));
1109         this_node->details->maintenance = TRUE;
1110     }
1111 
1112     discovery = pcmk__node_attr(this_node,
1113                                 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1114                                 NULL, pcmk__rsc_node_current);
1115     if ((discovery != NULL) && !crm_is_true(discovery)) {
1116         pcmk__warn_once(pcmk__wo_rdisc_enabled,
1117                         "Support for the "
1118                         PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1119                         " node attribute is deprecated and will be removed"
1120                         " (and behave as 'true') in a future release.");
1121 
1122         if (pcmk__is_remote_node(this_node)
1123             && !pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
1124             pcmk__config_warn("Ignoring "
1125                               PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1126                               " attribute on Pacemaker Remote node %s"
1127                               " because fencing is disabled",
1128                               pcmk__node_name(this_node));
1129         } else {
1130             /* This is either a remote node with fencing enabled, or a guest
1131              * node. We don't care whether fencing is enabled when fencing guest
1132              * nodes, because they are "fenced" by recovering their containing
1133              * resource.
1134              */
1135             crm_info("%s has resource discovery disabled",
1136                      pcmk__node_name(this_node));
1137             pcmk__clear_node_flags(this_node, pcmk__node_probes_allowed);
1138         }
1139     }
1140 }
1141 
1142 /*!
1143  * \internal
1144  * \brief Unpack a cluster node's transient attributes
1145  *
1146  * \param[in]     state      CIB node state XML
1147  * \param[in,out] node       Cluster node whose attributes are being unpacked
1148  * \param[in,out] scheduler  Scheduler data
1149  */
1150 static void
1151 unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1152                             pcmk_scheduler_t *scheduler)
1153 {
1154     const char *discovery = NULL;
1155     const xmlNode *attrs = pcmk__xe_first_child(state,
1156                                                 PCMK__XE_TRANSIENT_ATTRIBUTES,
1157                                                 NULL, NULL);
1158 
1159     add_node_attrs(attrs, node, TRUE, scheduler);
1160 
1161     if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_STANDBY, NULL,
1162                                     pcmk__rsc_node_current))) {
1163         crm_info("%s is in standby mode", pcmk__node_name(node));
1164         pcmk__set_node_flags(node, pcmk__node_standby);
1165     }
1166 
1167     if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
1168                                     pcmk__rsc_node_current))) {
1169         crm_info("%s is in maintenance mode", pcmk__node_name(node));
1170         node->details->maintenance = TRUE;
1171     }
1172 
1173     discovery = pcmk__node_attr(node,
1174                                 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1175                                 NULL, pcmk__rsc_node_current);
1176     if ((discovery != NULL) && !crm_is_true(discovery)) {
1177         pcmk__config_warn("Ignoring "
1178                           PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1179                           " attribute for %s because disabling resource"
1180                           " discovery is not allowed for cluster nodes",
1181                           pcmk__node_name(node));
1182     }
1183 }
1184 
1185 /*!
1186  * \internal
1187  * \brief Unpack a node state entry (first pass)
1188  *
1189  * Unpack one node state entry from status. This unpacks information from the
1190  * \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not
1191  * the resource history inside it. Multiple passes through the status are needed
1192  * to fully unpack everything.
1193  *
1194  * \param[in]     state      CIB node state XML
1195  * \param[in,out] scheduler  Scheduler data
1196  */
1197 static void
1198 unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1199 {
1200     const char *id = NULL;
1201     const char *uname = NULL;
1202     pcmk_node_t *this_node = NULL;
1203 
1204     id = crm_element_value(state, PCMK_XA_ID);
1205     if (id == NULL) {
1206         pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
1207                          PCMK_XA_ID);
1208         crm_log_xml_info(state, "missing-id");
1209         return;
1210     }
1211 
1212     uname = crm_element_value(state, PCMK_XA_UNAME);
1213     if (uname == NULL) {
1214         /* If a joining peer makes the cluster acquire the quorum from Corosync
1215          * but has not joined the controller CPG membership yet, it's possible
1216          * that the created PCMK__XE_NODE_STATE entry doesn't have a
1217          * PCMK_XA_UNAME yet. Recognize the node as pending and wait for it to
1218          * join CPG.
1219          */
1220         crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
1221                   "without " PCMK_XA_UNAME,
1222                   id);
1223     }
1224 
1225     this_node = pe_find_node_any(scheduler->nodes, id, uname);
1226     if (this_node == NULL) {
1227         crm_notice("Ignoring recorded state for removed node with name %s and "
1228                    PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id);
1229         return;
1230     }
1231 
1232     if (pcmk__is_pacemaker_remote_node(this_node)) {
1233         int remote_fenced = 0;
1234 
1235         /* We can't determine the online status of Pacemaker Remote nodes until
1236          * after all resource history has been unpacked. In this first pass, we
1237          * do need to mark whether the node has been fenced, as this plays a
1238          * role during unpacking cluster node resource state.
1239          */
1240         pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_FENCED),
1241                            &remote_fenced, 0);
1242         if (remote_fenced) {
1243             pcmk__set_node_flags(this_node, pcmk__node_remote_fenced);
1244         } else {
1245             pcmk__clear_node_flags(this_node, pcmk__node_remote_fenced);
1246         }
1247         return;
1248     }
1249 
1250     unpack_transient_attributes(state, this_node, scheduler);
1251 
1252     /* Provisionally mark this cluster node as clean. We have at least seen it
1253      * in the current cluster's lifetime.
1254      */
1255     this_node->details->unclean = FALSE;
1256     pcmk__set_node_flags(this_node, pcmk__node_seen);
1257 
1258     crm_trace("Determining online status of cluster node %s (id %s)",
1259               pcmk__node_name(this_node), id);
1260     determine_online_status(state, this_node, scheduler);
1261 
1262     if (!pcmk_is_set(scheduler->flags, pcmk__sched_quorate)
1263         && this_node->details->online
1264         && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) {
1265         /* Everything else should flow from this automatically
1266          * (at least until the scheduler becomes able to migrate off
1267          * healthy resources)
1268          */
1269         pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1270                       FALSE);
1271     }
1272 }
1273 
1274 /*!
1275  * \internal
1276  * \brief Unpack nodes' resource history as much as possible
1277  *
1278  * Unpack as many nodes' resource history as possible in one pass through the
1279  * status. We need to process Pacemaker Remote nodes' connections/containers
1280  * before unpacking their history; the connection/container history will be
1281  * in another node's history, so it might take multiple passes to unpack
1282  * everything.
1283  *
1284  * \param[in]     status     CIB XML status section
1285  * \param[in]     fence      If true, treat any not-yet-unpacked nodes as unseen
1286  * \param[in,out] scheduler  Scheduler data
1287  *
1288  * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done,
1289  *         or EAGAIN if more unpacking remains to be done)
1290  */
1291 static int
1292 unpack_node_history(const xmlNode *status, bool fence,
     /* [previous][next][first][last][top][bottom][index][help] */
1293                     pcmk_scheduler_t *scheduler)
1294 {
1295     int rc = pcmk_rc_ok;
1296 
1297     // Loop through all PCMK__XE_NODE_STATE entries in CIB status
1298     for (const xmlNode *state = pcmk__xe_first_child(status,
1299                                                      PCMK__XE_NODE_STATE, NULL,
1300                                                      NULL);
1301          state != NULL; state = pcmk__xe_next(state, PCMK__XE_NODE_STATE)) {
1302 
1303         const char *id = pcmk__xe_id(state);
1304         const char *uname = crm_element_value(state, PCMK_XA_UNAME);
1305         pcmk_node_t *this_node = NULL;
1306 
1307         if ((id == NULL) || (uname == NULL)) {
1308             // Warning already logged in first pass through status section
1309             crm_trace("Not unpacking resource history from malformed "
1310                       PCMK__XE_NODE_STATE " without id and/or uname");
1311             continue;
1312         }
1313 
1314         this_node = pe_find_node_any(scheduler->nodes, id, uname);
1315         if (this_node == NULL) {
1316             // Warning already logged in first pass through status section
1317             crm_trace("Not unpacking resource history for node %s because "
1318                       "no longer in configuration", id);
1319             continue;
1320         }
1321 
1322         if (pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1323             crm_trace("Not unpacking resource history for node %s because "
1324                       "already unpacked", id);
1325             continue;
1326         }
1327 
1328         if (fence) {
1329             // We're processing all remaining nodes
1330 
1331         } else if (pcmk__is_guest_or_bundle_node(this_node)) {
1332             /* We can unpack a guest node's history only after we've unpacked
1333              * other resource history to the point that we know that the node's
1334              * connection and containing resource are both up.
1335              */
1336             const pcmk_resource_t *remote = this_node->priv->remote;
1337             const pcmk_resource_t *launcher = remote->priv->launcher;
1338 
1339             if ((remote->priv->orig_role != pcmk_role_started)
1340                 || (launcher->priv->orig_role != pcmk_role_started)) {
1341                 crm_trace("Not unpacking resource history for guest node %s "
1342                           "because launcher and connection are not known to "
1343                           "be up", id);
1344                 continue;
1345             }
1346 
1347         } else if (pcmk__is_remote_node(this_node)) {
1348             /* We can unpack a remote node's history only after we've unpacked
1349              * other resource history to the point that we know that the node's
1350              * connection is up, with the exception of when shutdown locks are
1351              * in use.
1352              */
1353             pcmk_resource_t *rsc = this_node->priv->remote;
1354 
1355             if ((rsc == NULL)
1356                 || (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)
1357                     && (rsc->priv->orig_role != pcmk_role_started))) {
1358                 crm_trace("Not unpacking resource history for remote node %s "
1359                           "because connection is not known to be up", id);
1360                 continue;
1361             }
1362 
1363         /* If fencing and shutdown locks are disabled and we're not processing
1364          * unseen nodes, then we don't want to unpack offline nodes until online
1365          * nodes have been unpacked. This allows us to number active clone
1366          * instances first.
1367          */
1368         } else if (!pcmk_any_flags_set(scheduler->flags,
1369                                        pcmk__sched_fencing_enabled
1370                                        |pcmk__sched_shutdown_lock)
1371                    && !this_node->details->online) {
1372             crm_trace("Not unpacking resource history for offline "
1373                       "cluster node %s", id);
1374             continue;
1375         }
1376 
1377         if (pcmk__is_pacemaker_remote_node(this_node)) {
1378             determine_remote_online_status(scheduler, this_node);
1379             unpack_handle_remote_attrs(this_node, state, scheduler);
1380         }
1381 
1382         crm_trace("Unpacking resource history for %snode %s",
1383                   (fence? "unseen " : ""), id);
1384 
1385         pcmk__set_node_flags(this_node, pcmk__node_unpacked);
1386         unpack_node_lrm(this_node, state, scheduler);
1387 
1388         rc = EAGAIN; // Other node histories might depend on this one
1389     }
1390     return rc;
1391 }
1392 
1393 /* remove nodes that are down, stopping */
1394 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1395 /* anything else? */
1396 gboolean
1397 unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1398 {
1399     xmlNode *state = NULL;
1400 
1401     crm_trace("Beginning unpack");
1402 
1403     if (scheduler->priv->ticket_constraints == NULL) {
1404         scheduler->priv->ticket_constraints =
1405             pcmk__strkey_table(free, destroy_ticket);
1406     }
1407 
1408     for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
1409          state = pcmk__xe_next(state, NULL)) {
1410 
1411         if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
1412             pcmk__xe_foreach_child(state, PCMK__XE_TICKET_STATE,
1413                                    unpack_ticket_state, scheduler);
1414 
1415         } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1416             unpack_node_state(state, scheduler);
1417         }
1418     }
1419 
1420     while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1421         crm_trace("Another pass through node resource histories is needed");
1422     }
1423 
1424     // Now catch any nodes we didn't see
1425     unpack_node_history(status,
1426                         pcmk_is_set(scheduler->flags,
1427                                     pcmk__sched_fencing_enabled),
1428                         scheduler);
1429 
1430     /* Now that we know where resources are, we can schedule stops of containers
1431      * with failed bundle connections
1432      */
1433     if (scheduler->priv->stop_needed != NULL) {
1434         for (GList *item = scheduler->priv->stop_needed;
1435              item != NULL; item = item->next) {
1436 
1437             pcmk_resource_t *container = item->data;
1438             pcmk_node_t *node = pcmk__current_node(container);
1439 
1440             if (node) {
1441                 stop_action(container, node, FALSE);
1442             }
1443         }
1444         g_list_free(scheduler->priv->stop_needed);
1445         scheduler->priv->stop_needed = NULL;
1446     }
1447 
1448     /* Now that we know status of all Pacemaker Remote connections and nodes,
1449      * we can stop connections for node shutdowns, and check the online status
1450      * of remote/guest nodes that didn't have any node history to unpack.
1451      */
1452     for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1453         pcmk_node_t *this_node = gIter->data;
1454 
1455         if (!pcmk__is_pacemaker_remote_node(this_node)) {
1456             continue;
1457         }
1458         if (this_node->details->shutdown
1459             && (this_node->priv->remote != NULL)) {
1460             pe__set_next_role(this_node->priv->remote, pcmk_role_stopped,
1461                               "remote shutdown");
1462         }
1463         if (!pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1464             determine_remote_online_status(scheduler, this_node);
1465         }
1466     }
1467 
1468     return TRUE;
1469 }
1470 
1471 /*!
1472  * \internal
1473  * \brief Unpack node's time when it became a member at the cluster layer
1474  *
1475  * \param[in]     node_state  Node's \c PCMK__XE_NODE_STATE entry
1476  * \param[in,out] scheduler   Scheduler data
1477  *
1478  * \return Epoch time when node became a cluster member
1479  *         (or scheduler effective time for legacy entries) if a member,
1480  *         0 if not a member, or -1 if no valid information available
1481  */
1482 static long long
1483 unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1484 {
1485     const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
1486     int member = 0;
1487 
1488     if (member_time == NULL) {
1489         return -1LL;
1490 
1491     } else if (crm_str_to_boolean(member_time, &member) == 1) {
1492         /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1493          * recorded as a boolean for a DC < 2.1.7, or the node is pending
1494          * shutdown and has left the CPG, in which case it was set to 1 to avoid
1495          * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT.
1496          *
1497          * We return the effective time for in_ccm=1 because what's important to
1498          * avoid fencing is that effective time minus this value is less than
1499          * the pending node timeout.
1500          */
1501         return member? (long long) pcmk__scheduler_epoch_time(scheduler) : 0LL;
1502 
1503     } else {
1504         long long when_member = 0LL;
1505 
1506         if ((pcmk__scan_ll(member_time, &when_member,
1507                            0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1508             crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
1509                      " in " PCMK__XE_NODE_STATE " entry", member_time);
1510             return -1LL;
1511         }
1512         return when_member;
1513     }
1514 }
1515 
1516 /*!
1517  * \internal
1518  * \brief Unpack node's time when it became online in process group
1519  *
1520  * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry
1521  *
1522  * \return Epoch time when node became online in process group (or 0 if not
1523  *         online, or 1 for legacy online entries)
1524  */
1525 static long long
1526 unpack_node_online(const xmlNode *node_state)
     /* [previous][next][first][last][top][bottom][index][help] */
1527 {
1528     const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD);
1529 
1530     // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1531     if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
1532                      pcmk__str_casei|pcmk__str_null_matches)) {
1533         return 0LL;
1534 
1535     } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1536         return 1LL;
1537 
1538     } else {
1539         long long when_online = 0LL;
1540 
1541         if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1542             || (when_online < 0)) {
1543             crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
1544                      PCMK__XE_NODE_STATE " entry, assuming offline", peer_time);
1545             return 0LL;
1546         }
1547         return when_online;
1548     }
1549 }
1550 
1551 /*!
1552  * \internal
1553  * \brief Unpack node attribute for user-requested fencing
1554  *
1555  * \param[in] node        Node to check
1556  * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry in CIB status
1557  *
1558  * \return \c true if fencing has been requested for \p node, otherwise \c false
1559  */
1560 static bool
1561 unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
     /* [previous][next][first][last][top][bottom][index][help] */
1562 {
1563     long long value = 0LL;
1564     int value_i = 0;
1565     int rc = pcmk_rc_ok;
1566     const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
1567                                           NULL, pcmk__rsc_node_current);
1568 
1569     // Value may be boolean or an epoch time
1570     if (crm_str_to_boolean(value_s, &value_i) == 1) {
1571         return (value_i != 0);
1572     }
1573     rc = pcmk__scan_ll(value_s, &value, 0LL);
1574     if (rc == pcmk_rc_ok) {
1575         return (value > 0);
1576     }
1577     crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1578              "node attribute for %s: %s",
1579              value_s, pcmk__node_name(node), pcmk_rc_str(rc));
1580     return false;
1581 }
1582 
1583 static gboolean
1584 determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
     /* [previous][next][first][last][top][bottom][index][help] */
1585                                    const xmlNode *node_state,
1586                                    pcmk_node_t *this_node)
1587 {
1588     gboolean online = FALSE;
1589     const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1590     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1591     long long when_member = unpack_node_member(node_state, scheduler);
1592     long long when_online = unpack_node_online(node_state);
1593 
1594     if (when_member <= 0) {
1595         crm_trace("Node %s is %sdown", pcmk__node_name(this_node),
1596                   ((when_member < 0)? "presumed " : ""));
1597 
1598     } else if (when_online > 0) {
1599         if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1600             online = TRUE;
1601         } else {
1602             crm_debug("Node %s is not ready to run resources: %s",
1603                       pcmk__node_name(this_node), join);
1604         }
1605 
1606     } else if (!pcmk_is_set(this_node->priv->flags,
1607                             pcmk__node_expected_up)) {
1608         crm_trace("Node %s controller is down: "
1609                   "member@%lld online@%lld join=%s expected=%s",
1610                   pcmk__node_name(this_node), when_member, when_online,
1611                   pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1612 
1613     } else {
1614         /* mark it unclean */
1615         pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1616         crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
1617                  pcmk__node_name(this_node), when_member, when_online,
1618                  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1619     }
1620     return online;
1621 }
1622 
1623 /*!
1624  * \internal
1625  * \brief Check whether a node has taken too long to join controller group
1626  *
1627  * \param[in,out] scheduler    Scheduler data
1628  * \param[in]     node         Node to check
1629  * \param[in]     when_member  Epoch time when node became a cluster member
1630  * \param[in]     when_online  Epoch time when node joined controller group
1631  *
1632  * \return true if node has been pending (on the way up) longer than
1633  *         \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false
1634  * \note This will also update the cluster's recheck time if appropriate.
1635  */
1636 static inline bool
1637 pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1638                  long long when_member, long long when_online)
1639 {
1640     if ((scheduler->priv->node_pending_ms > 0U)
1641         && (when_member > 0) && (when_online <= 0)) {
1642         // There is a timeout on pending nodes, and node is pending
1643 
1644         time_t timeout = when_member
1645                          + pcmk__timeout_ms2s(scheduler->priv->node_pending_ms);
1646 
1647         if (pcmk__scheduler_epoch_time(node->priv->scheduler) >= timeout) {
1648             return true; // Node has timed out
1649         }
1650 
1651         // Node is pending, but still has time
1652         pcmk__update_recheck_time(timeout, scheduler, "pending node timeout");
1653     }
1654     return false;
1655 }
1656 
1657 static bool
1658 determine_online_status_fencing(pcmk_scheduler_t *scheduler,
     /* [previous][next][first][last][top][bottom][index][help] */
1659                                 const xmlNode *node_state,
1660                                 pcmk_node_t *this_node)
1661 {
1662     bool termination_requested = unpack_node_terminate(this_node, node_state);
1663     const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1664     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1665     long long when_member = unpack_node_member(node_state, scheduler);
1666     long long when_online = unpack_node_online(node_state);
1667 
1668 /*
1669   - PCMK__XA_JOIN          ::= member|down|pending|banned
1670   - PCMK_XA_EXPECTED       ::= member|down
1671 
1672   @COMPAT with entries recorded for DCs < 2.1.7
1673   - PCMK__XA_IN_CCM        ::= true|false
1674   - PCMK_XA_CRMD           ::= online|offline
1675 
1676   Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1677   - PCMK__XA_IN_CCM        ::= <timestamp>|0
1678   Since when node has been a cluster member. A value 0 of means the node is not
1679   a cluster member.
1680 
1681   - PCMK_XA_CRMD           ::= <timestamp>|0
1682   Since when peer has been online in CPG. A value 0 means the peer is offline
1683   in CPG.
1684 */
1685 
1686     crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1687               pcmk__node_name(this_node), when_member, when_online,
1688               pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1689               (termination_requested? " (termination requested)" : ""));
1690 
1691     if (this_node->details->shutdown) {
1692         crm_debug("%s is shutting down", pcmk__node_name(this_node));
1693 
1694         /* Slightly different criteria since we can't shut down a dead peer */
1695         return (when_online > 0);
1696     }
1697 
1698     if (when_member < 0) {
1699         pe_fence_node(scheduler, this_node,
1700                       "peer has not been seen by the cluster", FALSE);
1701         return false;
1702     }
1703 
1704     if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1705         pe_fence_node(scheduler, this_node,
1706                       "peer failed Pacemaker membership criteria", FALSE);
1707 
1708     } else if (termination_requested) {
1709         if ((when_member <= 0) && (when_online <= 0)
1710             && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1711             crm_info("%s was fenced as requested", pcmk__node_name(this_node));
1712             return false;
1713         }
1714         pe_fence_node(scheduler, this_node, "fencing was requested", false);
1715 
1716     } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1717                             pcmk__str_null_matches)) {
1718 
1719         if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1720             pe_fence_node(scheduler, this_node,
1721                           "peer pending timed out on joining the process group",
1722                           FALSE);
1723 
1724         } else if ((when_member > 0) || (when_online > 0)) {
1725             crm_info("- %s is not ready to run resources",
1726                      pcmk__node_name(this_node));
1727             pcmk__set_node_flags(this_node, pcmk__node_standby);
1728             this_node->details->pending = TRUE;
1729 
1730         } else {
1731             crm_trace("%s is down or still coming up",
1732                       pcmk__node_name(this_node));
1733         }
1734 
1735     } else if (when_member <= 0) {
1736         // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
1737         pe_fence_node(scheduler, this_node,
1738                       "peer is no longer part of the cluster", TRUE);
1739 
1740     } else if (when_online <= 0) {
1741         pe_fence_node(scheduler, this_node,
1742                       "peer process is no longer available", FALSE);
1743 
1744         /* Everything is running at this point, now check join state */
1745 
1746     } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1747         crm_info("%s is active", pcmk__node_name(this_node));
1748 
1749     } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
1750                                 CRMD_JOINSTATE_DOWN, NULL)) {
1751         crm_info("%s is not ready to run resources",
1752                  pcmk__node_name(this_node));
1753         pcmk__set_node_flags(this_node, pcmk__node_standby);
1754         this_node->details->pending = TRUE;
1755 
1756     } else {
1757         pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1758                       FALSE);
1759     }
1760 
1761     return (when_member > 0);
1762 }
1763 
1764 static void
1765 determine_remote_online_status(pcmk_scheduler_t *scheduler,
     /* [previous][next][first][last][top][bottom][index][help] */
1766                                pcmk_node_t *this_node)
1767 {
1768     pcmk_resource_t *rsc = this_node->priv->remote;
1769     pcmk_resource_t *launcher = NULL;
1770     pcmk_node_t *host = NULL;
1771     const char *node_type = "Remote";
1772 
1773     if (rsc == NULL) {
1774         /* This is a leftover node state entry for a former Pacemaker Remote
1775          * node whose connection resource was removed. Consider it offline.
1776          */
1777         crm_trace("Pacemaker Remote node %s is considered OFFLINE because "
1778                   "its connection resource has been removed from the CIB",
1779                   this_node->priv->id);
1780         this_node->details->online = FALSE;
1781         return;
1782     }
1783 
1784     launcher = rsc->priv->launcher;
1785     if (launcher != NULL) {
1786         node_type = "Guest";
1787         if (pcmk__list_of_1(rsc->priv->active_nodes)) {
1788             host = rsc->priv->active_nodes->data;
1789         }
1790     }
1791 
1792     /* If the resource is currently started, mark it online. */
1793     if (rsc->priv->orig_role == pcmk_role_started) {
1794         this_node->details->online = TRUE;
1795     }
1796 
1797     /* consider this node shutting down if transitioning start->stop */
1798     if ((rsc->priv->orig_role == pcmk_role_started)
1799         && (rsc->priv->next_role == pcmk_role_stopped)) {
1800 
1801         crm_trace("%s node %s shutting down because connection resource is stopping",
1802                   node_type, this_node->priv->id);
1803         this_node->details->shutdown = TRUE;
1804     }
1805 
1806     /* Now check all the failure conditions. */
1807     if ((launcher != NULL) && pcmk_is_set(launcher->flags, pcmk__rsc_failed)) {
1808         crm_trace("Guest node %s UNCLEAN because guest resource failed",
1809                   this_node->priv->id);
1810         this_node->details->online = FALSE;
1811         pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
1812 
1813     } else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
1814         crm_trace("%s node %s OFFLINE because connection resource failed",
1815                   node_type, this_node->priv->id);
1816         this_node->details->online = FALSE;
1817 
1818     } else if ((rsc->priv->orig_role == pcmk_role_stopped)
1819                || ((launcher != NULL)
1820                    && (launcher->priv->orig_role == pcmk_role_stopped))) {
1821 
1822         crm_trace("%s node %s OFFLINE because its resource is stopped",
1823                   node_type, this_node->priv->id);
1824         this_node->details->online = FALSE;
1825         pcmk__clear_node_flags(this_node, pcmk__node_remote_reset);
1826 
1827     } else if (host && (host->details->online == FALSE)
1828                && host->details->unclean) {
1829         crm_trace("Guest node %s UNCLEAN because host is unclean",
1830                   this_node->priv->id);
1831         this_node->details->online = FALSE;
1832         pcmk__set_node_flags(this_node, pcmk__node_remote_reset);
1833 
1834     } else {
1835         crm_trace("%s node %s is %s",
1836                   node_type, this_node->priv->id,
1837                   this_node->details->online? "ONLINE" : "OFFLINE");
1838     }
1839 }
1840 
1841 static void
1842 determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
     /* [previous][next][first][last][top][bottom][index][help] */
1843                         pcmk_scheduler_t *scheduler)
1844 {
1845     gboolean online = FALSE;
1846     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1847 
1848     CRM_CHECK(this_node != NULL, return);
1849 
1850     this_node->details->shutdown = FALSE;
1851 
1852     if (pe__shutdown_requested(this_node)) {
1853         this_node->details->shutdown = TRUE;
1854 
1855     } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1856         pcmk__set_node_flags(this_node, pcmk__node_expected_up);
1857     }
1858 
1859     if (!pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
1860         online = determine_online_status_no_fencing(scheduler, node_state,
1861                                                     this_node);
1862 
1863     } else {
1864         online = determine_online_status_fencing(scheduler, node_state,
1865                                                  this_node);
1866     }
1867 
1868     if (online) {
1869         this_node->details->online = TRUE;
1870 
1871     } else {
1872         /* remove node from contention */
1873         this_node->assign->score = -PCMK_SCORE_INFINITY;
1874     }
1875 
1876     if (online && this_node->details->shutdown) {
1877         /* don't run resources here */
1878         this_node->assign->score = -PCMK_SCORE_INFINITY;
1879     }
1880 
1881     if (this_node->details->unclean) {
1882         pcmk__sched_warn(scheduler, "%s is unclean",
1883                          pcmk__node_name(this_node));
1884 
1885     } else if (!this_node->details->online) {
1886         crm_trace("%s is offline", pcmk__node_name(this_node));
1887 
1888     } else if (this_node->details->shutdown) {
1889         crm_info("%s is shutting down", pcmk__node_name(this_node));
1890 
1891     } else if (this_node->details->pending) {
1892         crm_info("%s is pending", pcmk__node_name(this_node));
1893 
1894     } else if (pcmk_is_set(this_node->priv->flags, pcmk__node_standby)) {
1895         crm_info("%s is in standby", pcmk__node_name(this_node));
1896 
1897     } else if (this_node->details->maintenance) {
1898         crm_info("%s is in maintenance", pcmk__node_name(this_node));
1899 
1900     } else {
1901         crm_info("%s is online", pcmk__node_name(this_node));
1902     }
1903 }
1904 
1905 /*!
1906  * \internal
1907  * \brief Find the end of a resource's name, excluding any clone suffix
1908  *
1909  * \param[in] id  Resource ID to check
1910  *
1911  * \return Pointer to last character of resource's base name
1912  */
1913 const char *
1914 pe_base_name_end(const char *id)
     /* [previous][next][first][last][top][bottom][index][help] */
1915 {
1916     if (!pcmk__str_empty(id)) {
1917         const char *end = id + strlen(id) - 1;
1918 
1919         for (const char *s = end; s > id; --s) {
1920             switch (*s) {
1921                 case '0':
1922                 case '1':
1923                 case '2':
1924                 case '3':
1925                 case '4':
1926                 case '5':
1927                 case '6':
1928                 case '7':
1929                 case '8':
1930                 case '9':
1931                     break;
1932                 case ':':
1933                     return (s == end)? s : (s - 1);
1934                 default:
1935                     return end;
1936             }
1937         }
1938         return end;
1939     }
1940     return NULL;
1941 }
1942 
1943 /*!
1944  * \internal
1945  * \brief Get a resource name excluding any clone suffix
1946  *
1947  * \param[in] last_rsc_id  Resource ID to check
1948  *
1949  * \return Pointer to newly allocated string with resource's base name
1950  * \note It is the caller's responsibility to free() the result.
1951  *       This asserts on error, so callers can assume result is not NULL.
1952  */
1953 char *
1954 clone_strip(const char *last_rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1955 {
1956     const char *end = pe_base_name_end(last_rsc_id);
1957     char *basename = NULL;
1958 
1959     pcmk__assert(end != NULL);
1960     basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1961     pcmk__assert(basename != NULL);
1962     return basename;
1963 }
1964 
1965 /*!
1966  * \internal
1967  * \brief Get the name of the first instance of a cloned resource
1968  *
1969  * \param[in] last_rsc_id  Resource ID to check
1970  *
1971  * \return Pointer to newly allocated string with resource's base name plus :0
1972  * \note It is the caller's responsibility to free() the result.
1973  *       This asserts on error, so callers can assume result is not NULL.
1974  */
1975 char *
1976 clone_zero(const char *last_rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1977 {
1978     const char *end = pe_base_name_end(last_rsc_id);
1979     size_t base_name_len = end - last_rsc_id + 1;
1980     char *zero = NULL;
1981 
1982     pcmk__assert(end != NULL);
1983     zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
1984     memcpy(zero, last_rsc_id, base_name_len);
1985     zero[base_name_len] = ':';
1986     zero[base_name_len + 1] = '0';
1987     return zero;
1988 }
1989 
1990 static pcmk_resource_t *
1991 create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
     /* [previous][next][first][last][top][bottom][index][help] */
1992                      pcmk_scheduler_t *scheduler)
1993 {
1994     pcmk_resource_t *rsc = NULL;
1995     xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
1996 
1997     pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
1998     crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
1999     crm_log_xml_debug(xml_rsc, "Orphan resource");
2000 
2001     if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
2002         return NULL;
2003     }
2004 
2005     if (xml_contains_remote_node(xml_rsc)) {
2006         pcmk_node_t *node;
2007 
2008         crm_debug("Detected orphaned remote node %s", rsc_id);
2009         node = pcmk_find_node(scheduler, rsc_id);
2010         if (node == NULL) {
2011             node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, 0,
2012                                   scheduler);
2013         }
2014         link_rsc2remotenode(scheduler, rsc);
2015 
2016         if (node) {
2017             crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
2018             node->details->shutdown = TRUE;
2019         }
2020     }
2021 
2022     if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) {
2023         // This removed resource needs to be mapped to a launcher
2024         crm_trace("Launched resource %s was removed from the configuration",
2025                   rsc_id);
2026         pcmk__set_rsc_flags(rsc, pcmk__rsc_removed_launched);
2027     }
2028     pcmk__set_rsc_flags(rsc, pcmk__rsc_removed);
2029     scheduler->priv->resources = g_list_append(scheduler->priv->resources, rsc);
2030     return rsc;
2031 }
2032 
2033 /*!
2034  * \internal
2035  * \brief Create orphan instance for anonymous clone resource history
2036  *
2037  * \param[in,out] parent     Clone resource that orphan will be added to
2038  * \param[in]     rsc_id     Orphan's resource ID
2039  * \param[in]     node       Where orphan is active (for logging only)
2040  * \param[in,out] scheduler  Scheduler data
2041  *
2042  * \return Newly added orphaned instance of \p parent
2043  */
2044 static pcmk_resource_t *
2045 create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
     /* [previous][next][first][last][top][bottom][index][help] */
2046                         const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2047 {
2048     pcmk_resource_t *top = pe__create_clone_child(parent, scheduler);
2049     pcmk_resource_t *orphan = NULL;
2050 
2051     // find_rsc() because we might be a cloned group
2052     orphan = top->priv->fns->find_rsc(top, rsc_id, NULL,
2053                                       pcmk_rsc_match_clone_only);
2054 
2055     pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s",
2056                     top->id, parent->id, rsc_id, pcmk__node_name(node));
2057     return orphan;
2058 }
2059 
2060 /*!
2061  * \internal
2062  * \brief Check a node for an instance of an anonymous clone
2063  *
2064  * Return a child instance of the specified anonymous clone, in order of
2065  * preference: (1) the instance running on the specified node, if any;
2066  * (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX
2067  * instances); (3) a newly created orphan (that is, \c PCMK_META_CLONE_MAX
2068  * instances are already active).
2069  *
2070  * \param[in,out] scheduler  Scheduler data
2071  * \param[in]     node       Node on which to check for instance
2072  * \param[in,out] parent     Clone to check
2073  * \param[in]     rsc_id     Name of cloned resource in history (no instance)
2074  */
2075 static pcmk_resource_t *
2076 find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2077                      pcmk_resource_t *parent, const char *rsc_id)
2078 {
2079     GList *rIter = NULL;
2080     pcmk_resource_t *rsc = NULL;
2081     pcmk_resource_t *inactive_instance = NULL;
2082     gboolean skip_inactive = FALSE;
2083 
2084     pcmk__assert(pcmk__is_anonymous_clone(parent));
2085 
2086     // Check for active (or partially active, for cloned groups) instance
2087     pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
2088                     rsc_id, pcmk__node_name(node), parent->id);
2089 
2090     for (rIter = parent->priv->children;
2091          (rIter != NULL) && (rsc == NULL); rIter = rIter->next) {
2092 
2093         GList *locations = NULL;
2094         pcmk_resource_t *child = rIter->data;
2095 
2096         /* Check whether this instance is already known to be active or pending
2097          * anywhere, at this stage of unpacking. Because this function is called
2098          * for a resource before the resource's individual operation history
2099          * entries are unpacked, locations will generally not contain the
2100          * desired node.
2101          *
2102          * However, there are three exceptions:
2103          * (1) when child is a cloned group and we have already unpacked the
2104          *     history of another member of the group on the same node;
2105          * (2) when we've already unpacked the history of another numbered
2106          *     instance on the same node (which can happen if
2107          *     PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
2108          * (3) when we re-run calculations on the same scheduler data as part of
2109          *     a simulation.
2110          */
2111         child->priv->fns->location(child, &locations, pcmk__rsc_node_current
2112                                                       |pcmk__rsc_node_pending);
2113         if (locations) {
2114             /* We should never associate the same numbered anonymous clone
2115              * instance with multiple nodes, and clone instances can't migrate,
2116              * so there must be only one location, regardless of history.
2117              */
2118             CRM_LOG_ASSERT(locations->next == NULL);
2119 
2120             if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
2121                 /* This child instance is active on the requested node, so check
2122                  * for a corresponding configured resource. We use find_rsc()
2123                  * instead of child because child may be a cloned group, and we
2124                  * need the particular member corresponding to rsc_id.
2125                  *
2126                  * If the history entry is orphaned, rsc will be NULL.
2127                  */
2128                 rsc = parent->priv->fns->find_rsc(child, rsc_id, NULL,
2129                                                   pcmk_rsc_match_clone_only);
2130                 if (rsc) {
2131                     /* If there are multiple instance history entries for an
2132                      * anonymous clone in a single node's history (which can
2133                      * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
2134                      * to false), we want to consider the instances beyond the
2135                      * first as orphans, even if there are inactive instance
2136                      * numbers available.
2137                      */
2138                     if (rsc->priv->active_nodes != NULL) {
2139                         crm_notice("Active (now-)anonymous clone %s has "
2140                                    "multiple (orphan) instance histories on %s",
2141                                    parent->id, pcmk__node_name(node));
2142                         skip_inactive = TRUE;
2143                         rsc = NULL;
2144                     } else {
2145                         pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
2146                     }
2147                 }
2148             }
2149             g_list_free(locations);
2150 
2151         } else {
2152             pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
2153             if (!skip_inactive && !inactive_instance
2154                 && !pcmk_is_set(child->flags, pcmk__rsc_blocked)) {
2155                 // Remember one inactive instance in case we don't find active
2156                 inactive_instance =
2157                     parent->priv->fns->find_rsc(child, rsc_id, NULL,
2158                                                 pcmk_rsc_match_clone_only);
2159 
2160                 /* ... but don't use it if it was already associated with a
2161                  * pending action on another node
2162                  */
2163                 if (inactive_instance != NULL) {
2164                     const pcmk_node_t *pending_node = NULL;
2165 
2166                     pending_node = inactive_instance->priv->pending_node;
2167                     if ((pending_node != NULL)
2168                         && !pcmk__same_node(pending_node, node)) {
2169                         inactive_instance = NULL;
2170                     }
2171                 }
2172             }
2173         }
2174     }
2175 
2176     if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2177         pcmk__rsc_trace(parent, "Resource %s, empty slot",
2178                         inactive_instance->id);
2179         rsc = inactive_instance;
2180     }
2181 
2182     /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
2183      * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
2184      * don't want to consume a valid instance number for unclean nodes. Such
2185      * instances may appear to be active according to the history, but should be
2186      * considered inactive, so we can start an instance elsewhere. Treat such
2187      * instances as orphans.
2188      *
2189      * An exception is instances running on guest nodes -- since guest node
2190      * "fencing" is actually just a resource stop, requires shouldn't apply.
2191      *
2192      * @TODO Ideally, we'd use an inactive instance number if it is not needed
2193      * for any clean instances. However, we don't know that at this point.
2194      */
2195     if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing)
2196         && (!node->details->online || node->details->unclean)
2197         && !pcmk__is_guest_or_bundle_node(node)
2198         && !pe__is_universal_clone(parent, scheduler)) {
2199 
2200         rsc = NULL;
2201     }
2202 
2203     if (rsc == NULL) {
2204         rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
2205         pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id);
2206     }
2207     return rsc;
2208 }
2209 
2210 static pcmk_resource_t *
2211 unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2212                      const char *rsc_id)
2213 {
2214     pcmk_resource_t *rsc = NULL;
2215     pcmk_resource_t *parent = NULL;
2216 
2217     crm_trace("looking for %s", rsc_id);
2218     rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2219 
2220     if (rsc == NULL) {
2221         /* If we didn't find the resource by its name in the operation history,
2222          * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
2223          * we create a single :0 orphan to match against here.
2224          */
2225         char *clone0_id = clone_zero(rsc_id);
2226         pcmk_resource_t *clone0 = pe_find_resource(scheduler->priv->resources,
2227                                                    clone0_id);
2228 
2229         if (clone0 && !pcmk_is_set(clone0->flags, pcmk__rsc_unique)) {
2230             rsc = clone0;
2231             parent = uber_parent(clone0);
2232             crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2233         } else {
2234             crm_trace("%s is not known as %s either (orphan)",
2235                       rsc_id, clone0_id);
2236         }
2237         free(clone0_id);
2238 
2239     } else if (rsc->priv->variant > pcmk__rsc_variant_primitive) {
2240         crm_trace("Resource history for %s is orphaned "
2241                   "because it is no longer primitive", rsc_id);
2242         return NULL;
2243 
2244     } else {
2245         parent = uber_parent(rsc);
2246     }
2247 
2248     if (pcmk__is_anonymous_clone(parent)) {
2249 
2250         if (pcmk__is_bundled(parent)) {
2251             rsc = pe__find_bundle_replica(parent->priv->parent, node);
2252         } else {
2253             char *base = clone_strip(rsc_id);
2254 
2255             rsc = find_anonymous_clone(scheduler, node, parent, base);
2256             free(base);
2257             pcmk__assert(rsc != NULL);
2258         }
2259     }
2260 
2261     if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
2262         && !pcmk__str_eq(rsc_id, rsc->priv->history_id, pcmk__str_none)) {
2263 
2264         pcmk__str_update(&(rsc->priv->history_id), rsc_id);
2265         pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2266                         rsc_id, pcmk__node_name(node), rsc->id,
2267                         pcmk_is_set(rsc->flags, pcmk__rsc_removed)? " (ORPHAN)" : "");
2268     }
2269     return rsc;
2270 }
2271 
2272 static pcmk_resource_t *
2273 process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2274                         pcmk_scheduler_t *scheduler)
2275 {
2276     pcmk_resource_t *rsc = NULL;
2277     const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2278 
2279     crm_debug("Detected orphan resource %s on %s",
2280               rsc_id, pcmk__node_name(node));
2281     rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2282     if (rsc == NULL) {
2283         return NULL;
2284     }
2285 
2286     if (!pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) {
2287         pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
2288 
2289     } else {
2290         CRM_CHECK(rsc != NULL, return NULL);
2291         pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id);
2292         resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
2293                           "__orphan_do_not_run__", scheduler);
2294     }
2295     return rsc;
2296 }
2297 
2298 static void
2299 process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2300                   enum pcmk__on_fail on_fail)
2301 {
2302     pcmk_node_t *tmpnode = NULL;
2303     char *reason = NULL;
2304     enum pcmk__on_fail save_on_fail = pcmk__on_fail_ignore;
2305     pcmk_scheduler_t *scheduler = NULL;
2306     bool known_active = false;
2307 
2308     pcmk__assert(rsc != NULL);
2309     scheduler = rsc->priv->scheduler;
2310     known_active = (rsc->priv->orig_role > pcmk_role_stopped);
2311     pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2312                     rsc->id, pcmk_role_text(rsc->priv->orig_role),
2313                     pcmk__node_name(node), pcmk__on_fail_text(on_fail));
2314 
2315     /* process current state */
2316     if (rsc->priv->orig_role != pcmk_role_unknown) {
2317         pcmk_resource_t *iter = rsc;
2318 
2319         while (iter) {
2320             if (g_hash_table_lookup(iter->priv->probed_nodes,
2321                                     node->priv->id) == NULL) {
2322                 pcmk_node_t *n = pe__copy_node(node);
2323 
2324                 pcmk__rsc_trace(rsc, "%s (%s in history) known on %s",
2325                                 rsc->id,
2326                                 pcmk__s(rsc->priv->history_id, "the same"),
2327                                 pcmk__node_name(n));
2328                 g_hash_table_insert(iter->priv->probed_nodes,
2329                                     (gpointer) n->priv->id, n);
2330             }
2331             if (pcmk_is_set(iter->flags, pcmk__rsc_unique)) {
2332                 break;
2333             }
2334             iter = iter->priv->parent;
2335         }
2336     }
2337 
2338     /* If a managed resource is believed to be running, but node is down ... */
2339     if (known_active && !node->details->online && !node->details->maintenance
2340         && pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
2341 
2342         gboolean should_fence = FALSE;
2343 
2344         /* If this is a guest node, fence it (regardless of whether fencing is
2345          * enabled, because guest node fencing is done by recovery of the
2346          * container resource rather than by the fencer). Mark the resource
2347          * we're processing as failed. When the guest comes back up, its
2348          * operation history in the CIB will be cleared, freeing the affected
2349          * resource to run again once we are sure we know its state.
2350          */
2351         if (pcmk__is_guest_or_bundle_node(node)) {
2352             pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2353             should_fence = TRUE;
2354 
2355         } else if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
2356             if (pcmk__is_remote_node(node)
2357                 && (node->priv->remote != NULL)
2358                 && !pcmk_is_set(node->priv->remote->flags,
2359                                 pcmk__rsc_failed)) {
2360 
2361                 /* Setting unseen means that fencing of the remote node will
2362                  * occur only if the connection resource is not going to start
2363                  * somewhere. This allows connection resources on a failed
2364                  * cluster node to move to another node without requiring the
2365                  * remote nodes to be fenced as well.
2366                  */
2367                 pcmk__clear_node_flags(node, pcmk__node_seen);
2368                 reason = crm_strdup_printf("%s is active there (fencing will be"
2369                                            " revoked if remote connection can "
2370                                            "be re-established elsewhere)",
2371                                            rsc->id);
2372             }
2373             should_fence = TRUE;
2374         }
2375 
2376         if (should_fence) {
2377             if (reason == NULL) {
2378                reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
2379             }
2380             pe_fence_node(scheduler, node, reason, FALSE);
2381         }
2382         free(reason);
2383     }
2384 
2385     /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2386     save_on_fail = on_fail;
2387 
2388     if (node->details->unclean) {
2389         /* No extra processing needed
2390          * Also allows resources to be started again after a node is shot
2391          */
2392         on_fail = pcmk__on_fail_ignore;
2393     }
2394 
2395     switch (on_fail) {
2396         case pcmk__on_fail_ignore:
2397             /* nothing to do */
2398             break;
2399 
2400         case pcmk__on_fail_demote:
2401             pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
2402             demote_action(rsc, node, FALSE);
2403             break;
2404 
2405         case pcmk__on_fail_fence_node:
2406             /* treat it as if it is still running
2407              * but also mark the node as unclean
2408              */
2409             reason = crm_strdup_printf("%s failed there", rsc->id);
2410             pe_fence_node(scheduler, node, reason, FALSE);
2411             free(reason);
2412             break;
2413 
2414         case pcmk__on_fail_standby_node:
2415             pcmk__set_node_flags(node,
2416                                  pcmk__node_standby|pcmk__node_fail_standby);
2417             break;
2418 
2419         case pcmk__on_fail_block:
2420             /* is_managed == FALSE will prevent any
2421              * actions being sent for the resource
2422              */
2423             pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed);
2424             pcmk__set_rsc_flags(rsc, pcmk__rsc_blocked);
2425             break;
2426 
2427         case pcmk__on_fail_ban:
2428             /* make sure it comes up somewhere else
2429              * or not at all
2430              */
2431             resource_location(rsc, node, -PCMK_SCORE_INFINITY,
2432                               "__action_migration_auto__", scheduler);
2433             break;
2434 
2435         case pcmk__on_fail_stop:
2436             pe__set_next_role(rsc, pcmk_role_stopped,
2437                               PCMK_META_ON_FAIL "=" PCMK_VALUE_STOP);
2438             break;
2439 
2440         case pcmk__on_fail_restart:
2441             if (known_active) {
2442                 pcmk__set_rsc_flags(rsc,
2443                                     pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2444                 stop_action(rsc, node, FALSE);
2445             }
2446             break;
2447 
2448         case pcmk__on_fail_restart_container:
2449             pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2450             if ((rsc->priv->launcher != NULL) && pcmk__is_bundled(rsc)) {
2451                 /* A bundle's remote connection can run on a different node than
2452                  * the bundle's container. We don't necessarily know where the
2453                  * container is running yet, so remember it and add a stop
2454                  * action for it later.
2455                  */
2456                 scheduler->priv->stop_needed =
2457                     g_list_prepend(scheduler->priv->stop_needed,
2458                                    rsc->priv->launcher);
2459             } else if (rsc->priv->launcher != NULL) {
2460                 stop_action(rsc->priv->launcher, node, FALSE);
2461             } else if (known_active) {
2462                 stop_action(rsc, node, FALSE);
2463             }
2464             break;
2465 
2466         case pcmk__on_fail_reset_remote:
2467             pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2468             if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
2469                 tmpnode = NULL;
2470                 if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
2471                     tmpnode = pcmk_find_node(scheduler, rsc->id);
2472                 }
2473                 if (pcmk__is_remote_node(tmpnode)
2474                     && !pcmk_is_set(tmpnode->priv->flags,
2475                                     pcmk__node_remote_fenced)) {
2476                     /* The remote connection resource failed in a way that
2477                      * should result in fencing the remote node.
2478                      */
2479                     pe_fence_node(scheduler, tmpnode,
2480                                   "remote connection is unrecoverable", FALSE);
2481                 }
2482             }
2483 
2484             /* require the stop action regardless if fencing is occurring or not. */
2485             if (known_active) {
2486                 stop_action(rsc, node, FALSE);
2487             }
2488 
2489             /* if reconnect delay is in use, prevent the connection from exiting the
2490              * "STOPPED" role until the failure is cleared by the delay timeout. */
2491             if (rsc->priv->remote_reconnect_ms > 0U) {
2492                 pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2493             }
2494             break;
2495     }
2496 
2497     /* Ensure a remote connection failure forces an unclean Pacemaker Remote
2498      * node to be fenced. By marking the node as seen, the failure will result
2499      * in a fencing operation regardless if we're going to attempt to reconnect
2500      * in this transition.
2501      */
2502     if (pcmk_all_flags_set(rsc->flags,
2503                            pcmk__rsc_failed|pcmk__rsc_is_remote_connection)) {
2504         tmpnode = pcmk_find_node(scheduler, rsc->id);
2505         if (tmpnode && tmpnode->details->unclean) {
2506             pcmk__set_node_flags(tmpnode, pcmk__node_seen);
2507         }
2508     }
2509 
2510     if (known_active) {
2511         if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
2512             if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
2513                 crm_notice("Removed resource %s is active on %s and will be "
2514                            "stopped when possible",
2515                            rsc->id, pcmk__node_name(node));
2516             } else {
2517                 crm_notice("Removed resource %s must be stopped manually on %s "
2518                            "because " PCMK_OPT_STOP_ORPHAN_RESOURCES
2519                            " is set to false", rsc->id, pcmk__node_name(node));
2520             }
2521         }
2522 
2523         native_add_running(rsc, node, scheduler,
2524                            (save_on_fail != pcmk__on_fail_ignore));
2525         switch (on_fail) {
2526             case pcmk__on_fail_ignore:
2527                 break;
2528             case pcmk__on_fail_demote:
2529             case pcmk__on_fail_block:
2530                 pcmk__set_rsc_flags(rsc, pcmk__rsc_failed);
2531                 break;
2532             default:
2533                 pcmk__set_rsc_flags(rsc,
2534                                     pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
2535                 break;
2536         }
2537 
2538     } else if ((rsc->priv->history_id != NULL)
2539                && (strchr(rsc->priv->history_id, ':') != NULL)) {
2540         /* @COMPAT This is for older (<1.1.8) status sections that included
2541          * instance numbers, otherwise stopped instances are considered orphans.
2542          *
2543          * @TODO We should be able to drop this, but some old regression tests
2544          * will need to be updated. Double-check that this is not still needed
2545          * for unique clones (which may have been later converted to anonymous).
2546          */
2547         pcmk__rsc_trace(rsc, "Clearing history ID %s for %s (stopped)",
2548                         rsc->priv->history_id, rsc->id);
2549         free(rsc->priv->history_id);
2550         rsc->priv->history_id = NULL;
2551 
2552     } else {
2553         GList *possible_matches = pe__resource_actions(rsc, node,
2554                                                        PCMK_ACTION_STOP, FALSE);
2555         GList *gIter = possible_matches;
2556 
2557         for (; gIter != NULL; gIter = gIter->next) {
2558             pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2559 
2560             pcmk__set_action_flags(stop, pcmk__action_optional);
2561         }
2562 
2563         g_list_free(possible_matches);
2564     }
2565 
2566     /* A successful stop after migrate_to on the migration source doesn't make
2567      * the partially migrated resource stopped on the migration target.
2568      */
2569     if ((rsc->priv->orig_role == pcmk_role_stopped)
2570         && (rsc->priv->active_nodes != NULL)
2571         && (rsc->priv->partial_migration_target != NULL)
2572         && pcmk__same_node(rsc->priv->partial_migration_source, node)) {
2573 
2574         rsc->priv->orig_role = pcmk_role_started;
2575     }
2576 }
2577 
2578 /* create active recurring operations as optional */
2579 static void
2580 process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
2581                   int start_index, int stop_index,
2582                   GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2583 {
2584     int counter = -1;
2585     const char *task = NULL;
2586     const char *status = NULL;
2587     GList *gIter = sorted_op_list;
2588 
2589     pcmk__assert(rsc != NULL);
2590     pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
2591                     rsc->id, start_index, stop_index);
2592 
2593     for (; gIter != NULL; gIter = gIter->next) {
2594         xmlNode *rsc_op = (xmlNode *) gIter->data;
2595 
2596         guint interval_ms = 0;
2597         char *key = NULL;
2598         const char *id = pcmk__xe_id(rsc_op);
2599 
2600         counter++;
2601 
2602         if (node->details->online == FALSE) {
2603             pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
2604                             rsc->id, pcmk__node_name(node));
2605             break;
2606 
2607             /* Need to check if there's a monitor for role="Stopped" */
2608         } else if (start_index < stop_index && counter <= stop_index) {
2609             pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2610                             id, pcmk__node_name(node));
2611             continue;
2612 
2613         } else if (counter < start_index) {
2614             pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
2615                             id, pcmk__node_name(node), counter);
2616             continue;
2617         }
2618 
2619         crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms);
2620         if (interval_ms == 0) {
2621             pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2622                             id, pcmk__node_name(node));
2623             continue;
2624         }
2625 
2626         status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2627         if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2628             pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
2629                             id, pcmk__node_name(node));
2630             continue;
2631         }
2632         task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2633         /* create the action */
2634         key = pcmk__op_key(rsc->id, task, interval_ms);
2635         pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
2636         custom_action(rsc, key, task, node, TRUE, scheduler);
2637     }
2638 }
2639 
2640 void
2641 calculate_active_ops(const GList *sorted_op_list, int *start_index,
     /* [previous][next][first][last][top][bottom][index][help] */
2642                      int *stop_index)
2643 {
2644     int counter = -1;
2645     int implied_monitor_start = -1;
2646     int implied_clone_start = -1;
2647     const char *task = NULL;
2648     const char *status = NULL;
2649 
2650     *stop_index = -1;
2651     *start_index = -1;
2652 
2653     for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2654         const xmlNode *rsc_op = (const xmlNode *) iter->data;
2655 
2656         counter++;
2657 
2658         task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2659         status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2660 
2661         if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2662             && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2663             *stop_index = counter;
2664 
2665         } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2666                                         PCMK_ACTION_MIGRATE_FROM, NULL)) {
2667             *start_index = counter;
2668 
2669         } else if ((implied_monitor_start <= *stop_index)
2670                    && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2671                                    pcmk__str_casei)) {
2672             const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE);
2673 
2674             if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2675                 implied_monitor_start = counter;
2676             }
2677         } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
2678                                         PCMK_ACTION_DEMOTE, NULL)) {
2679             implied_clone_start = counter;
2680         }
2681     }
2682 
2683     if (*start_index == -1) {
2684         if (implied_clone_start != -1) {
2685             *start_index = implied_clone_start;
2686         } else if (implied_monitor_start != -1) {
2687             *start_index = implied_monitor_start;
2688         }
2689     }
2690 }
2691 
2692 // If resource history entry has shutdown lock, remember lock node and time
2693 static void
2694 unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
2695                      const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2696 {
2697     time_t lock_time = 0;   // When lock started (i.e. node shutdown time)
2698 
2699     if ((crm_element_value_epoch(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK,
2700                                  &lock_time) == pcmk_ok) && (lock_time != 0)) {
2701 
2702         if ((scheduler->priv->shutdown_lock_ms > 0U)
2703             && (pcmk__scheduler_epoch_time(scheduler)
2704                 > (lock_time + pcmk__timeout_ms2s(scheduler->priv->shutdown_lock_ms)))) {
2705             pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
2706                            rsc->id, pcmk__node_name(node));
2707             pe__clear_resource_history(rsc, node);
2708         } else {
2709             rsc->priv->lock_node = node;
2710             rsc->priv->lock_time = lock_time;
2711         }
2712     }
2713 }
2714 
2715 /*!
2716  * \internal
2717  * \brief Unpack one \c PCMK__XE_LRM_RESOURCE entry from a node's CIB status
2718  *
2719  * \param[in,out] node       Node whose status is being unpacked
2720  * \param[in]     rsc_entry  \c PCMK__XE_LRM_RESOURCE XML being unpacked
2721  * \param[in,out] scheduler  Scheduler data
2722  *
2723  * \return Resource corresponding to the entry, or NULL if no operation history
2724  */
2725 static pcmk_resource_t *
2726 unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
     /* [previous][next][first][last][top][bottom][index][help] */
2727                     pcmk_scheduler_t *scheduler)
2728 {
2729     GList *gIter = NULL;
2730     int stop_index = -1;
2731     int start_index = -1;
2732     enum rsc_role_e req_role = pcmk_role_unknown;
2733 
2734     const char *rsc_id = pcmk__xe_id(lrm_resource);
2735 
2736     pcmk_resource_t *rsc = NULL;
2737     GList *op_list = NULL;
2738     GList *sorted_op_list = NULL;
2739 
2740     xmlNode *rsc_op = NULL;
2741     xmlNode *last_failure = NULL;
2742 
2743     enum pcmk__on_fail on_fail = pcmk__on_fail_ignore;
2744     enum rsc_role_e saved_role = pcmk_role_unknown;
2745 
2746     if (rsc_id == NULL) {
2747         pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
2748                          " entry: No " PCMK_XA_ID);
2749         crm_log_xml_info(lrm_resource, "missing-id");
2750         return NULL;
2751     }
2752     crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s",
2753               rsc_id, pcmk__node_name(node));
2754 
2755     /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
2756      * them
2757      */
2758     for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
2759                                        NULL);
2760          rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
2761 
2762         op_list = g_list_prepend(op_list, rsc_op);
2763     }
2764 
2765     if (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
2766         if (op_list == NULL) {
2767             // If there are no operations, there is nothing to do
2768             return NULL;
2769         }
2770     }
2771 
2772     /* find the resource */
2773     rsc = unpack_find_resource(scheduler, node, rsc_id);
2774     if (rsc == NULL) {
2775         if (op_list == NULL) {
2776             // If there are no operations, there is nothing to do
2777             return NULL;
2778         } else {
2779             rsc = process_orphan_resource(lrm_resource, node, scheduler);
2780         }
2781     }
2782     pcmk__assert(rsc != NULL);
2783 
2784     // Check whether the resource is "shutdown-locked" to this node
2785     if (pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) {
2786         unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2787     }
2788 
2789     /* process operations */
2790     saved_role = rsc->priv->orig_role;
2791     rsc->priv->orig_role = pcmk_role_unknown;
2792     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2793 
2794     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2795         xmlNode *rsc_op = (xmlNode *) gIter->data;
2796 
2797         unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2798     }
2799 
2800     /* create active recurring operations as optional */
2801     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2802     process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2803                       scheduler);
2804 
2805     /* no need to free the contents */
2806     g_list_free(sorted_op_list);
2807 
2808     process_rsc_state(rsc, node, on_fail);
2809 
2810     if (get_target_role(rsc, &req_role)) {
2811         if ((rsc->priv->next_role == pcmk_role_unknown)
2812             || (req_role < rsc->priv->next_role)) {
2813 
2814             pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE);
2815 
2816         } else if (req_role > rsc->priv->next_role) {
2817             pcmk__rsc_info(rsc,
2818                            "%s: Not overwriting calculated next role %s"
2819                            " with requested next role %s",
2820                            rsc->id, pcmk_role_text(rsc->priv->next_role),
2821                            pcmk_role_text(req_role));
2822         }
2823     }
2824 
2825     if (saved_role > rsc->priv->orig_role) {
2826         rsc->priv->orig_role = saved_role;
2827     }
2828 
2829     return rsc;
2830 }
2831 
2832 static void
2833 handle_removed_launched_resources(const xmlNode *lrm_rsc_list,
     /* [previous][next][first][last][top][bottom][index][help] */
2834                                   pcmk_scheduler_t *scheduler)
2835 {
2836     for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list,
2837                                                          PCMK__XE_LRM_RESOURCE,
2838                                                          NULL, NULL);
2839          rsc_entry != NULL;
2840          rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2841 
2842         pcmk_resource_t *rsc;
2843         pcmk_resource_t *launcher = NULL;
2844         const char *rsc_id;
2845         const char *launcher_id = NULL;
2846 
2847         launcher_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER);
2848         rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2849         if ((launcher_id == NULL) || (rsc_id == NULL)) {
2850             continue;
2851         }
2852 
2853         launcher = pe_find_resource(scheduler->priv->resources, launcher_id);
2854         if (launcher == NULL) {
2855             continue;
2856         }
2857 
2858         rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2859         if ((rsc == NULL) || (rsc->priv->launcher != NULL)
2860             || !pcmk_is_set(rsc->flags, pcmk__rsc_removed_launched)) {
2861             continue;
2862         }
2863 
2864         pcmk__rsc_trace(rsc, "Mapped launcher of removed resource %s to %s",
2865                         rsc->id, launcher_id);
2866         rsc->priv->launcher = launcher;
2867         launcher->priv->launched = g_list_append(launcher->priv->launched,
2868                                                     rsc);
2869     }
2870 }
2871 
2872 /*!
2873  * \internal
2874  * \brief Unpack one node's lrm status section
2875  *
2876  * \param[in,out] node       Node whose status is being unpacked
2877  * \param[in]     xml        CIB node state XML
2878  * \param[in,out] scheduler  Scheduler data
2879  */
2880 static void
2881 unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
     /* [previous][next][first][last][top][bottom][index][help] */
2882                 pcmk_scheduler_t *scheduler)
2883 {
2884     bool found_removed_launched_resource = false;
2885 
2886     // Drill down to PCMK__XE_LRM_RESOURCES section
2887     xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
2888     if (xml == NULL) {
2889         return;
2890     }
2891     xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
2892     if (xml == NULL) {
2893         return;
2894     }
2895 
2896     // Unpack each PCMK__XE_LRM_RESOURCE entry
2897     for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
2898                                                          PCMK__XE_LRM_RESOURCE,
2899                                                          NULL, NULL);
2900          rsc_entry != NULL;
2901          rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2902 
2903         pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2904 
2905         if ((rsc != NULL)
2906             && pcmk_is_set(rsc->flags, pcmk__rsc_removed_launched)) {
2907             found_removed_launched_resource = true;
2908         }
2909     }
2910 
2911     /* Now that all resource state has been unpacked for this node, map any
2912      * removed launched resources to their launchers.
2913      */
2914     if (found_removed_launched_resource) {
2915         handle_removed_launched_resources(xml, scheduler);
2916     }
2917 }
2918 
2919 static void
2920 set_active(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
2921 {
2922     const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2923 
2924     if (top && pcmk_is_set(top->flags, pcmk__rsc_promotable)) {
2925         rsc->priv->orig_role = pcmk_role_unpromoted;
2926     } else {
2927         rsc->priv->orig_role = pcmk_role_started;
2928     }
2929 }
2930 
2931 static void
2932 set_node_score(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
2933 {
2934     pcmk_node_t *node = value;
2935     int *score = user_data;
2936 
2937     node->assign->score = *score;
2938 }
2939 
2940 #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
2941                          "/" PCMK__XE_NODE_STATE
2942 #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM             \
2943                                "/" PCMK__XE_LRM_RESOURCES   \
2944                                "/" PCMK__XE_LRM_RESOURCE
2945 #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
2946 
2947 static xmlNode *
2948 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
     /* [previous][next][first][last][top][bottom][index][help] */
2949             int target_rc, pcmk_scheduler_t *scheduler)
2950 {
2951     GString *xpath = NULL;
2952     xmlNode *xml = NULL;
2953 
2954     CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2955               return NULL);
2956 
2957     xpath = g_string_sized_new(256);
2958     pcmk__g_strcat(xpath,
2959                    XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
2960                    SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
2961                    SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
2962                    NULL);
2963 
2964     /* Need to check against transition_magic too? */
2965     if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
2966         pcmk__g_strcat(xpath,
2967                        " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
2968                        NULL);
2969 
2970     } else if ((source != NULL)
2971                && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
2972         pcmk__g_strcat(xpath,
2973                        " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
2974                        NULL);
2975     } else {
2976         g_string_append_c(xpath, ']');
2977     }
2978 
2979     xml = pcmk__xpath_find_one(scheduler->input->doc, xpath->str, LOG_DEBUG);
2980     g_string_free(xpath, TRUE);
2981 
2982     if (xml && target_rc >= 0) {
2983         int rc = PCMK_OCF_UNKNOWN_ERROR;
2984         int status = PCMK_EXEC_ERROR;
2985 
2986         crm_element_value_int(xml, PCMK__XA_RC_CODE, &rc);
2987         crm_element_value_int(xml, PCMK__XA_OP_STATUS, &status);
2988         if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
2989             return NULL;
2990         }
2991     }
2992     return xml;
2993 }
2994 
2995 static xmlNode *
2996 find_lrm_resource(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
2997                   pcmk_scheduler_t *scheduler)
2998 {
2999     GString *xpath = NULL;
3000     xmlNode *xml = NULL;
3001 
3002     CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
3003 
3004     xpath = g_string_sized_new(256);
3005     pcmk__g_strcat(xpath,
3006                    XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
3007                    SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
3008                    NULL);
3009 
3010     xml = pcmk__xpath_find_one(scheduler->input->doc, xpath->str, LOG_DEBUG);
3011 
3012     g_string_free(xpath, TRUE);
3013     return xml;
3014 }
3015 
3016 /*!
3017  * \internal
3018  * \brief Check whether a resource has no completed action history on a node
3019  *
3020  * \param[in,out] rsc        Resource to check
3021  * \param[in]     node_name  Node to check
3022  *
3023  * \return true if \p rsc_id is unknown on \p node_name, otherwise false
3024  */
3025 static bool
3026 unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
3027 {
3028     bool result = false;
3029     xmlXPathObject *search;
3030     char *xpath = NULL;
3031 
3032     xpath = crm_strdup_printf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
3033                               SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']"
3034                               SUB_XPATH_LRM_RSC_OP
3035                               "[@" PCMK__XA_RC_CODE "!='%d']",
3036                               node_name, rsc->id, PCMK_OCF_UNKNOWN);
3037 
3038     search = pcmk__xpath_search(rsc->priv->scheduler->input->doc, xpath);
3039     result = (pcmk__xpath_num_results(search) == 0);
3040     xmlXPathFreeObject(search);
3041     free(xpath);
3042     return result;
3043 }
3044 
3045 /*!
3046  * \internal
3047  * \brief Check whether a probe/monitor indicating the resource was not running
3048  *        on a node happened after some event
3049  *
3050  * \param[in]     rsc_id     Resource being checked
3051  * \param[in]     node_name  Node being checked
3052  * \param[in]     xml_op     Event that monitor is being compared to
3053  * \param[in,out] scheduler  Scheduler data
3054  *
3055  * \return true if such a monitor happened after event, false otherwise
3056  */
3057 static bool
3058 monitor_not_running_after(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
3059                           const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3060 {
3061     /* Any probe/monitor operation on the node indicating it was not running
3062      * there
3063      */
3064     xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
3065                                    NULL, PCMK_OCF_NOT_RUNNING, scheduler);
3066 
3067     return (monitor != NULL) && (pe__is_newer_op(monitor, xml_op) > 0);
3068 }
3069 
3070 /*!
3071  * \internal
3072  * \brief Check whether any non-monitor operation on a node happened after some
3073  *        event
3074  *
3075  * \param[in]     rsc_id     Resource being checked
3076  * \param[in]     node_name  Node being checked
3077  * \param[in]     xml_op     Event that non-monitor is being compared to
3078  * \param[in,out] scheduler  Scheduler data
3079  *
3080  * \return true if such a operation happened after event, false otherwise
3081  */
3082 static bool
3083 non_monitor_after(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
3084                   const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3085 {
3086     xmlNode *lrm_resource = NULL;
3087 
3088     lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
3089     if (lrm_resource == NULL) {
3090         return false;
3091     }
3092 
3093     for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
3094                                             NULL, NULL);
3095          op != NULL; op = pcmk__xe_next(op, PCMK__XE_LRM_RSC_OP)) {
3096 
3097         const char * task = NULL;
3098 
3099         if (op == xml_op) {
3100             continue;
3101         }
3102 
3103         task = crm_element_value(op, PCMK_XA_OPERATION);
3104 
3105         if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP,
3106                              PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
3107                              NULL)
3108             && pe__is_newer_op(op, xml_op) > 0) {
3109             return true;
3110         }
3111     }
3112 
3113     return false;
3114 }
3115 
3116 /*!
3117  * \internal
3118  * \brief Check whether the resource has newer state on a node after a migration
3119  *        attempt
3120  *
3121  * \param[in]     rsc_id        Resource being checked
3122  * \param[in]     node_name     Node being checked
3123  * \param[in]     migrate_to    Any migrate_to event that is being compared to
3124  * \param[in]     migrate_from  Any migrate_from event that is being compared to
3125  * \param[in,out] scheduler     Scheduler data
3126  *
3127  * \return true if such a operation happened after event, false otherwise
3128  */
3129 static bool
3130 newer_state_after_migrate(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
3131                           const xmlNode *migrate_to,
3132                           const xmlNode *migrate_from,
3133                           pcmk_scheduler_t *scheduler)
3134 {
3135     const xmlNode *xml_op = (migrate_from != NULL)? migrate_from : migrate_to;
3136     const char *source = crm_element_value(xml_op, PCMK__META_MIGRATE_SOURCE);
3137 
3138     /* It's preferred to compare to the migrate event on the same node if
3139      * existing, since call ids are more reliable.
3140      */
3141     if ((xml_op != migrate_to) && (migrate_to != NULL)
3142         && pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3143 
3144         xml_op = migrate_to;
3145     }
3146 
3147     /* If there's any newer non-monitor operation on the node, or any newer
3148      * probe/monitor operation on the node indicating it was not running there,
3149      * the migration events potentially no longer matter for the node.
3150      */
3151     return non_monitor_after(rsc_id, node_name, xml_op, scheduler)
3152            || monitor_not_running_after(rsc_id, node_name, xml_op, scheduler);
3153 }
3154 
3155 /*!
3156  * \internal
3157  * \brief Parse migration source and target node names from history entry
3158  *
3159  * \param[in]  entry        Resource history entry for a migration action
3160  * \param[in]  source_node  If not NULL, source must match this node
3161  * \param[in]  target_node  If not NULL, target must match this node
3162  * \param[out] source_name  Where to store migration source node name
3163  * \param[out] target_name  Where to store migration target node name
3164  *
3165  * \return Standard Pacemaker return code
3166  */
3167 static int
3168 get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
     /* [previous][next][first][last][top][bottom][index][help] */
3169                          const pcmk_node_t *target_node,
3170                          const char **source_name, const char **target_name)
3171 {
3172     *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE);
3173     *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET);
3174     if ((*source_name == NULL) || (*target_name == NULL)) {
3175         pcmk__config_err("Ignoring resource history entry %s without "
3176                          PCMK__META_MIGRATE_SOURCE " and "
3177                          PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
3178         return pcmk_rc_unpack_error;
3179     }
3180 
3181     if ((source_node != NULL)
3182         && !pcmk__str_eq(*source_name, source_node->priv->name,
3183                          pcmk__str_casei|pcmk__str_null_matches)) {
3184         pcmk__config_err("Ignoring resource history entry %s because "
3185                          PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
3186                          pcmk__xe_id(entry), *source_name,
3187                          pcmk__node_name(source_node));
3188         return pcmk_rc_unpack_error;
3189     }
3190 
3191     if ((target_node != NULL)
3192         && !pcmk__str_eq(*target_name, target_node->priv->name,
3193                          pcmk__str_casei|pcmk__str_null_matches)) {
3194         pcmk__config_err("Ignoring resource history entry %s because "
3195                          PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
3196                          pcmk__xe_id(entry), *target_name,
3197                          pcmk__node_name(target_node));
3198         return pcmk_rc_unpack_error;
3199     }
3200 
3201     return pcmk_rc_ok;
3202 }
3203 
3204 /*
3205  * \internal
3206  * \brief Add a migration source to a resource's list of dangling migrations
3207  *
3208  * If the migrate_to and migrate_from actions in a live migration both
3209  * succeeded, but there is no stop on the source, the migration is considered
3210  * "dangling." Add the source to the resource's dangling migration list, which
3211  * will be used to schedule a stop on the source without affecting the target.
3212  *
3213  * \param[in,out] rsc   Resource involved in migration
3214  * \param[in]     node  Migration source
3215  */
3216 static void
3217 add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
3218 {
3219     pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3220                     rsc->id, pcmk__node_name(node));
3221     rsc->priv->orig_role = pcmk_role_stopped;
3222     rsc->priv->dangling_migration_sources =
3223         g_list_prepend(rsc->priv->dangling_migration_sources,
3224                        (gpointer) node);
3225 }
3226 
3227 /*!
3228  * \internal
3229  * \brief Update resource role etc. after a successful migrate_to action
3230  *
3231  * \param[in,out] history  Parsed action result history
3232  */
3233 static void
3234 unpack_migrate_to_success(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3235 {
3236     /* A complete migration sequence is:
3237      * 1. migrate_to on source node (which succeeded if we get to this function)
3238      * 2. migrate_from on target node
3239      * 3. stop on source node
3240      *
3241      * If no migrate_from has happened, the migration is considered to be
3242      * "partial". If the migrate_from succeeded but no stop has happened, the
3243      * migration is considered to be "dangling".
3244      *
3245      * If a successful migrate_to and stop have happened on the source node, we
3246      * still need to check for a partial migration, due to scenarios (easier to
3247      * produce with batch-limit=1) like:
3248      *
3249      * - A resource is migrating from node1 to node2, and a migrate_to is
3250      *   initiated for it on node1.
3251      *
3252      * - node2 goes into standby mode while the migrate_to is pending, which
3253      *   aborts the transition.
3254      *
3255      * - Upon completion of the migrate_to, a new transition schedules a stop
3256      *   on both nodes and a start on node1.
3257      *
3258      * - If the new transition is aborted for any reason while the resource is
3259      *   stopping on node1, the transition after that stop completes will see
3260      *   the migrate_to and stop on the source, but it's still a partial
3261      *   migration, and the resource must be stopped on node2 because it is
3262      *   potentially active there due to the migrate_to.
3263      *
3264      *   We also need to take into account that either node's history may be
3265      *   cleared at any point in the migration process.
3266      */
3267     int from_rc = PCMK_OCF_OK;
3268     int from_status = PCMK_EXEC_PENDING;
3269     pcmk_node_t *target_node = NULL;
3270     xmlNode *migrate_from = NULL;
3271     const char *source = NULL;
3272     const char *target = NULL;
3273     bool source_newer_op = false;
3274     bool target_newer_state = false;
3275     bool active_on_target = false;
3276     pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3277 
3278     // Get source and target node names from XML
3279     if (get_migration_node_names(history->xml, history->node, NULL, &source,
3280                                  &target) != pcmk_rc_ok) {
3281         return;
3282     }
3283 
3284     // Check for newer state on the source
3285     source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3286                                         scheduler);
3287 
3288     // Check for a migrate_from action from this source on the target
3289     migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3290                                target, source, -1, scheduler);
3291     if (migrate_from != NULL) {
3292         if (source_newer_op) {
3293             /* There's a newer non-monitor operation on the source and a
3294              * migrate_from on the target, so this migrate_to is irrelevant to
3295              * the resource's state.
3296              */
3297             return;
3298         }
3299         crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
3300         crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
3301     }
3302 
3303     /* If the resource has newer state on both the source and target after the
3304      * migration events, this migrate_to is irrelevant to the resource's state.
3305      */
3306     target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3307                                                    history->xml, migrate_from,
3308                                                    scheduler);
3309     if (source_newer_op && target_newer_state) {
3310         return;
3311     }
3312 
3313     /* Check for dangling migration (migrate_from succeeded but stop not done).
3314      * We know there's no stop because we already returned if the target has a
3315      * migrate_from and the source has any newer non-monitor operation.
3316      */
3317     if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3318         add_dangling_migration(history->rsc, history->node);
3319         return;
3320     }
3321 
3322     /* Without newer state, this migrate_to implies the resource is active.
3323      * (Clones are not allowed to migrate, so role can't be promoted.)
3324      */
3325     history->rsc->priv->orig_role = pcmk_role_started;
3326 
3327     target_node = pcmk_find_node(scheduler, target);
3328     active_on_target = !target_newer_state && (target_node != NULL)
3329                        && target_node->details->online;
3330 
3331     if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3332         if (active_on_target) {
3333             native_add_running(history->rsc, target_node, scheduler, TRUE);
3334         } else {
3335             // Mark resource as failed, require recovery, and prevent migration
3336             pcmk__set_rsc_flags(history->rsc,
3337                                 pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
3338             pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
3339         }
3340         return;
3341     }
3342 
3343     // The migrate_from is pending, complete but erased, or to be scheduled
3344 
3345     /* If there is no history at all for the resource on an online target, then
3346      * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3347      * have the probe result, it will be reflected in target_newer_state.
3348      */
3349     if ((target_node != NULL) && target_node->details->online
3350         && unknown_on_node(history->rsc, target)) {
3351         return;
3352     }
3353 
3354     if (active_on_target) {
3355         pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3356 
3357         native_add_running(history->rsc, target_node, scheduler, FALSE);
3358         if ((source_node != NULL) && source_node->details->online) {
3359             /* This is a partial migration: the migrate_to completed
3360              * successfully on the source, but the migrate_from has not
3361              * completed. Remember the source and target; if the newly
3362              * chosen target remains the same when we schedule actions
3363              * later, we may continue with the migration.
3364              */
3365             history->rsc->priv->partial_migration_target = target_node;
3366             history->rsc->priv->partial_migration_source = source_node;
3367         }
3368 
3369     } else if (!source_newer_op) {
3370         // Mark resource as failed, require recovery, and prevent migration
3371         pcmk__set_rsc_flags(history->rsc,
3372                             pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
3373         pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable);
3374     }
3375 }
3376 
3377 /*!
3378  * \internal
3379  * \brief Update resource role etc. after a failed migrate_to action
3380  *
3381  * \param[in,out] history  Parsed action result history
3382  */
3383 static void
3384 unpack_migrate_to_failure(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3385 {
3386     xmlNode *target_migrate_from = NULL;
3387     const char *source = NULL;
3388     const char *target = NULL;
3389     pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3390 
3391     // Get source and target node names from XML
3392     if (get_migration_node_names(history->xml, history->node, NULL, &source,
3393                                  &target) != pcmk_rc_ok) {
3394         return;
3395     }
3396 
3397     /* If a migration failed, we have to assume the resource is active. Clones
3398      * are not allowed to migrate, so role can't be promoted.
3399      */
3400     history->rsc->priv->orig_role = pcmk_role_started;
3401 
3402     // Check for migrate_from on the target
3403     target_migrate_from = find_lrm_op(history->rsc->id,
3404                                       PCMK_ACTION_MIGRATE_FROM, target, source,
3405                                       PCMK_OCF_OK, scheduler);
3406 
3407     if (/* If the resource state is unknown on the target, it will likely be
3408          * probed there.
3409          * Don't just consider it running there. We will get back here anyway in
3410          * case the probe detects it's running there.
3411          */
3412         !unknown_on_node(history->rsc, target)
3413         /* If the resource has newer state on the target after the migration
3414          * events, this migrate_to no longer matters for the target.
3415          */
3416         && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3417                                       target_migrate_from, scheduler)) {
3418         /* The resource has no newer state on the target, so assume it's still
3419          * active there.
3420          * (if it is up).
3421          */
3422         pcmk_node_t *target_node = pcmk_find_node(scheduler, target);
3423 
3424         if (target_node && target_node->details->online) {
3425             native_add_running(history->rsc, target_node, scheduler, FALSE);
3426         }
3427 
3428     } else if (!non_monitor_after(history->rsc->id, source, history->xml,
3429                                   scheduler)) {
3430         /* We know the resource has newer state on the target, but this
3431          * migrate_to still matters for the source as long as there's no newer
3432          * non-monitor operation there.
3433          */
3434 
3435         // Mark node as having dangling migration so we can force a stop later
3436         history->rsc->priv->dangling_migration_sources =
3437             g_list_prepend(history->rsc->priv->dangling_migration_sources,
3438                            (gpointer) history->node);
3439     }
3440 }
3441 
3442 /*!
3443  * \internal
3444  * \brief Update resource role etc. after a failed migrate_from action
3445  *
3446  * \param[in,out] history  Parsed action result history
3447  */
3448 static void
3449 unpack_migrate_from_failure(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3450 {
3451     xmlNode *source_migrate_to = NULL;
3452     const char *source = NULL;
3453     const char *target = NULL;
3454     pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3455 
3456     // Get source and target node names from XML
3457     if (get_migration_node_names(history->xml, NULL, history->node, &source,
3458                                  &target) != pcmk_rc_ok) {
3459         return;
3460     }
3461 
3462     /* If a migration failed, we have to assume the resource is active. Clones
3463      * are not allowed to migrate, so role can't be promoted.
3464      */
3465     history->rsc->priv->orig_role = pcmk_role_started;
3466 
3467     // Check for a migrate_to on the source
3468     source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3469                                     source, target, PCMK_OCF_OK, scheduler);
3470 
3471     if (/* If the resource state is unknown on the source, it will likely be
3472          * probed there.
3473          * Don't just consider it running there. We will get back here anyway in
3474          * case the probe detects it's running there.
3475          */
3476         !unknown_on_node(history->rsc, source)
3477         /* If the resource has newer state on the source after the migration
3478          * events, this migrate_from no longer matters for the source.
3479          */
3480         && !newer_state_after_migrate(history->rsc->id, source,
3481                                       source_migrate_to, history->xml,
3482                                       scheduler)) {
3483         /* The resource has no newer state on the source, so assume it's still
3484          * active there (if it is up).
3485          */
3486         pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3487 
3488         if (source_node && source_node->details->online) {
3489             native_add_running(history->rsc, source_node, scheduler, TRUE);
3490         }
3491     }
3492 }
3493 
3494 /*!
3495  * \internal
3496  * \brief Add an action to cluster's list of failed actions
3497  *
3498  * \param[in,out] history  Parsed action result history
3499  */
3500 static void
3501 record_failed_op(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3502 {
3503     const pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3504 
3505     if (!(history->node->details->online)) {
3506         return;
3507     }
3508 
3509     for (const xmlNode *xIter = scheduler->priv->failed->children;
3510          xIter != NULL; xIter = xIter->next) {
3511 
3512         const char *key = pcmk__xe_history_key(xIter);
3513         const char *uname = crm_element_value(xIter, PCMK_XA_UNAME);
3514 
3515         if (pcmk__str_eq(history->key, key, pcmk__str_none)
3516             && pcmk__str_eq(uname, history->node->priv->name,
3517                             pcmk__str_casei)) {
3518             crm_trace("Skipping duplicate entry %s on %s",
3519                       history->key, pcmk__node_name(history->node));
3520             return;
3521         }
3522     }
3523 
3524     crm_trace("Adding entry for %s on %s to failed action list",
3525               history->key, pcmk__node_name(history->node));
3526     crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name);
3527     crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
3528     pcmk__xml_copy(scheduler->priv->failed, history->xml);
3529 }
3530 
3531 static char *
3532 last_change_str(const xmlNode *xml_op)
     /* [previous][next][first][last][top][bottom][index][help] */
3533 {
3534     time_t when;
3535     char *result = NULL;
3536 
3537     if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE,
3538                                 &when) == pcmk_ok) {
3539         char *when_s = pcmk__epoch2str(&when, 0);
3540         const char *p = strchr(when_s, ' ');
3541 
3542         // Skip day of week to make message shorter
3543         if ((p != NULL) && (*(++p) != '\0')) {
3544             result = pcmk__str_copy(p);
3545         }
3546         free(when_s);
3547     }
3548 
3549     if (result == NULL) {
3550         result = pcmk__str_copy("unknown_time");
3551     }
3552 
3553     return result;
3554 }
3555 
3556 /*!
3557  * \internal
3558  * \brief Ban a resource (or its clone if an anonymous instance) from all nodes
3559  *
3560  * \param[in,out] rsc  Resource to ban
3561  */
3562 static void
3563 ban_from_all_nodes(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
3564 {
3565     int score = -PCMK_SCORE_INFINITY;
3566     const pcmk_scheduler_t *scheduler = rsc->priv->scheduler;
3567 
3568     if (rsc->priv->parent != NULL) {
3569         pcmk_resource_t *parent = uber_parent(rsc);
3570 
3571         if (pcmk__is_anonymous_clone(parent)) {
3572             /* For anonymous clones, if an operation with
3573              * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
3574              * entire clone must stop.
3575              */
3576             rsc = parent;
3577         }
3578     }
3579 
3580     // Ban the resource from all nodes
3581     crm_notice("%s will not be started under current conditions", rsc->id);
3582     if (rsc->priv->allowed_nodes != NULL) {
3583         g_hash_table_destroy(rsc->priv->allowed_nodes);
3584     }
3585     rsc->priv->allowed_nodes = pe__node_list2table(scheduler->nodes);
3586     g_hash_table_foreach(rsc->priv->allowed_nodes, set_node_score, &score);
3587 }
3588 
3589 /*!
3590  * \internal
3591  * \brief Get configured failure handling and role after failure for an action
3592  *
3593  * \param[in,out] history    Unpacked action history entry
3594  * \param[out]    on_fail    Where to set configured failure handling
3595  * \param[out]    fail_role  Where to set to role after failure
3596  */
3597 static void
3598 unpack_failure_handling(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
3599                         enum pcmk__on_fail *on_fail,
3600                         enum rsc_role_e *fail_role)
3601 {
3602     xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3603                                                history->interval_ms, true);
3604 
3605     GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3606                                                 history->task,
3607                                                 history->interval_ms, config);
3608 
3609     const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
3610 
3611     *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3612                                    history->interval_ms, on_fail_str);
3613     *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3614                                           meta);
3615     g_hash_table_destroy(meta);
3616 }
3617 
3618 /*!
3619  * \internal
3620  * \brief Update resource role, failure handling, etc., after a failed action
3621  *
3622  * \param[in,out] history         Parsed action result history
3623  * \param[in]     config_on_fail  Action failure handling from configuration
3624  * \param[in]     fail_role       Resource's role after failure of this action
3625  * \param[out]    last_failure    This will be set to the history XML
3626  * \param[in,out] on_fail         Actual handling of action result
3627  */
3628 static void
3629 unpack_rsc_op_failure(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
3630                       enum pcmk__on_fail config_on_fail,
3631                       enum rsc_role_e fail_role, xmlNode **last_failure,
3632                       enum pcmk__on_fail *on_fail)
3633 {
3634     bool is_probe = false;
3635     char *last_change_s = NULL;
3636     pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3637 
3638     *last_failure = history->xml;
3639 
3640     is_probe = pcmk_xe_is_probe(history->xml);
3641     last_change_s = last_change_str(history->xml);
3642 
3643     if (!pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)
3644         && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3645         crm_trace("Unexpected result (%s%s%s) was recorded for "
3646                   "%s of %s on %s at %s " QB_XS " exit-status=%d id=%s",
3647                   crm_exit_str(history->exit_status),
3648                   (pcmk__str_empty(history->exit_reason)? "" : ": "),
3649                   pcmk__s(history->exit_reason, ""),
3650                   (is_probe? "probe" : history->task), history->rsc->id,
3651                   pcmk__node_name(history->node), last_change_s,
3652                   history->exit_status, history->id);
3653     } else {
3654         pcmk__sched_warn(scheduler,
3655                          "Unexpected result (%s%s%s) was recorded for %s of "
3656                          "%s on %s at %s " QB_XS " exit-status=%d id=%s",
3657                          crm_exit_str(history->exit_status),
3658                          (pcmk__str_empty(history->exit_reason)? "" : ": "),
3659                          pcmk__s(history->exit_reason, ""),
3660                          (is_probe? "probe" : history->task), history->rsc->id,
3661                          pcmk__node_name(history->node), last_change_s,
3662                          history->exit_status, history->id);
3663 
3664         if (is_probe && (history->exit_status != PCMK_OCF_OK)
3665             && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3666             && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3667 
3668             /* A failed (not just unexpected) probe result could mean the user
3669              * didn't know resources will be probed even where they can't run.
3670              */
3671             crm_notice("If it is not possible for %s to run on %s, see "
3672                        "the " PCMK_XA_RESOURCE_DISCOVERY " option for location "
3673                        "constraints",
3674                        history->rsc->id, pcmk__node_name(history->node));
3675         }
3676 
3677         record_failed_op(history);
3678     }
3679 
3680     free(last_change_s);
3681 
3682     if (*on_fail < config_on_fail) {
3683         pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3684                         pcmk__on_fail_text(*on_fail),
3685                         pcmk__on_fail_text(config_on_fail), history->key);
3686         *on_fail = config_on_fail;
3687     }
3688 
3689     if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3690         resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
3691                           "__stop_fail__", scheduler);
3692 
3693     } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3694         unpack_migrate_to_failure(history);
3695 
3696     } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3697         unpack_migrate_from_failure(history);
3698 
3699     } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3700         history->rsc->priv->orig_role = pcmk_role_promoted;
3701 
3702     } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3703         if (config_on_fail == pcmk__on_fail_block) {
3704             history->rsc->priv->orig_role = pcmk_role_promoted;
3705             pe__set_next_role(history->rsc, pcmk_role_stopped,
3706                               "demote with " PCMK_META_ON_FAIL "=block");
3707 
3708         } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3709             history->rsc->priv->orig_role = pcmk_role_stopped;
3710 
3711         } else {
3712             /* Staying in the promoted role would put the scheduler and
3713              * controller into a loop. Setting the role to unpromoted is not
3714              * dangerous because the resource will be stopped as part of
3715              * recovery, and any promotion will be ordered after that stop.
3716              */
3717             history->rsc->priv->orig_role = pcmk_role_unpromoted;
3718         }
3719     }
3720 
3721     if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3722         /* leave stopped */
3723         pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3724         history->rsc->priv->orig_role = pcmk_role_stopped;
3725 
3726     } else if (history->rsc->priv->orig_role < pcmk_role_started) {
3727         pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3728         set_active(history->rsc);
3729     }
3730 
3731     pcmk__rsc_trace(history->rsc,
3732                     "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3733                     history->rsc->id,
3734                     pcmk_role_text(history->rsc->priv->orig_role),
3735                     pcmk__btoa(history->node->details->unclean),
3736                     pcmk__on_fail_text(config_on_fail),
3737                     pcmk_role_text(fail_role));
3738 
3739     if ((fail_role != pcmk_role_started)
3740         && (history->rsc->priv->next_role < fail_role)) {
3741         pe__set_next_role(history->rsc, fail_role, "failure");
3742     }
3743 
3744     if (fail_role == pcmk_role_stopped) {
3745         ban_from_all_nodes(history->rsc);
3746     }
3747 }
3748 
3749 /*!
3750  * \internal
3751  * \brief Block a resource with a failed action if it cannot be recovered
3752  *
3753  * If resource action is a failed stop and fencing is not possible, mark the
3754  * resource as unmanaged and blocked, since recovery cannot be done.
3755  *
3756  * \param[in,out] history  Parsed action history entry
3757  */
3758 static void
3759 block_if_unrecoverable(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3760 {
3761     char *last_change_s = NULL;
3762 
3763     if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3764         return; // All actions besides stop are always recoverable
3765     }
3766     if (pe_can_fence(history->node->priv->scheduler, history->node)) {
3767         return; // Failed stops are recoverable via fencing
3768     }
3769 
3770     last_change_s = last_change_str(history->xml);
3771     pcmk__sched_err(history->node->priv->scheduler,
3772                     "No further recovery can be attempted for %s "
3773                     "because %s on %s failed (%s%s%s) at %s "
3774                     QB_XS " rc=%d id=%s",
3775                     history->rsc->id, history->task,
3776                     pcmk__node_name(history->node),
3777                     crm_exit_str(history->exit_status),
3778                     (pcmk__str_empty(history->exit_reason)? "" : ": "),
3779                     pcmk__s(history->exit_reason, ""),
3780                     last_change_s, history->exit_status, history->id);
3781 
3782     free(last_change_s);
3783 
3784     pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_managed);
3785     pcmk__set_rsc_flags(history->rsc, pcmk__rsc_blocked);
3786 }
3787 
3788 /*!
3789  * \internal
3790  * \brief Update action history's execution status and why
3791  *
3792  * \param[in,out] history  Parsed action history entry
3793  * \param[out]    why      Where to store reason for update
3794  * \param[in]     value    New value
3795  * \param[in]     reason   Description of why value was changed
3796  */
3797 static inline void
3798 remap_because(struct action_history *history, const char **why, int value,
     /* [previous][next][first][last][top][bottom][index][help] */
3799               const char *reason)
3800 {
3801     if (history->execution_status != value) {
3802         history->execution_status = value;
3803         *why = reason;
3804     }
3805 }
3806 
3807 /*!
3808  * \internal
3809  * \brief Remap informational monitor results and operation status
3810  *
3811  * For the monitor results, certain OCF codes are for providing extended information
3812  * to the user about services that aren't yet failed but not entirely healthy either.
3813  * These must be treated as the "normal" result by Pacemaker.
3814  *
3815  * For operation status, the action result can be used to determine an appropriate
3816  * status for the purposes of responding to the action.  The status provided by the
3817  * executor is not directly usable since the executor does not know what was expected.
3818  *
3819  * \param[in,out] history  Parsed action history entry
3820  * \param[in,out] on_fail  What should be done about the result
3821  * \param[in]     expired  Whether result is expired
3822  *
3823  * \note If the result is remapped and the node is not shutting down or failed,
3824  *       the operation will be recorded in the scheduler data's list of failed
3825  *       operations to highlight it for the user.
3826  *
3827  * \note This may update the resource's current and next role.
3828  */
3829 static void
3830 remap_operation(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
3831                 enum pcmk__on_fail *on_fail, bool expired)
3832 {
3833     /* @TODO It would probably also be a good idea to map an exit status of
3834      * CRM_EX_PROMOTED or CRM_EX_DEGRADED_PROMOTED to CRM_EX_OK for promote
3835      * actions
3836      */
3837 
3838     bool is_probe = false;
3839     int orig_exit_status = history->exit_status;
3840     int orig_exec_status = history->execution_status;
3841     const char *why = NULL;
3842     const char *task = history->task;
3843 
3844     // Remap degraded results to their successful counterparts
3845     history->exit_status = pcmk__effective_rc(history->exit_status);
3846     if (history->exit_status != orig_exit_status) {
3847         why = "degraded result";
3848         if (!expired && (!history->node->details->shutdown
3849                          || history->node->details->online)) {
3850             record_failed_op(history);
3851         }
3852     }
3853 
3854     if (!pcmk__is_bundled(history->rsc)
3855         && pcmk_xe_mask_probe_failure(history->xml)
3856         && ((history->execution_status != PCMK_EXEC_DONE)
3857             || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3858         history->execution_status = PCMK_EXEC_DONE;
3859         history->exit_status = PCMK_OCF_NOT_RUNNING;
3860         why = "equivalent probe result";
3861     }
3862 
3863     /* If the executor reported an execution status of anything but done or
3864      * error, consider that final. But for done or error, we know better whether
3865      * it should be treated as a failure or not, because we know the expected
3866      * result.
3867      */
3868     switch (history->execution_status) {
3869         case PCMK_EXEC_DONE:
3870         case PCMK_EXEC_ERROR:
3871             break;
3872 
3873         // These should be treated as node-fatal
3874         case PCMK_EXEC_NO_FENCE_DEVICE:
3875         case PCMK_EXEC_NO_SECRETS:
3876             remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3877                           "node-fatal error");
3878             goto remap_done;
3879 
3880         default:
3881             goto remap_done;
3882     }
3883 
3884     is_probe = pcmk_xe_is_probe(history->xml);
3885     if (is_probe) {
3886         task = "probe";
3887     }
3888 
3889     if (history->expected_exit_status < 0) {
3890         /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3891          * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3892          * expected exit status in the transition key, which (along with the
3893          * similar case of a corrupted transition key in the CIB) will be
3894          * reported to this function as -1. Pacemaker 2.0+ does not support
3895          * rolling upgrades from those versions or processing of saved CIB files
3896          * from those versions, so we do not need to care much about this case.
3897          */
3898         remap_because(history, &why, PCMK_EXEC_ERROR,
3899                       "obsolete history format");
3900         pcmk__config_warn("Expected result not found for %s on %s "
3901                           "(corrupt or obsolete CIB?)",
3902                           history->key, pcmk__node_name(history->node));
3903 
3904     } else if (history->exit_status == history->expected_exit_status) {
3905         remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
3906 
3907     } else {
3908         remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
3909         pcmk__rsc_debug(history->rsc,
3910                         "%s on %s: expected %d (%s), got %d (%s%s%s)",
3911                         history->key, pcmk__node_name(history->node),
3912                         history->expected_exit_status,
3913                         crm_exit_str(history->expected_exit_status),
3914                         history->exit_status,
3915                         crm_exit_str(history->exit_status),
3916                         (pcmk__str_empty(history->exit_reason)? "" : ": "),
3917                         pcmk__s(history->exit_reason, ""));
3918     }
3919 
3920     switch (history->exit_status) {
3921         case PCMK_OCF_OK:
3922             if (is_probe
3923                 && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
3924                 char *last_change_s = last_change_str(history->xml);
3925 
3926                 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3927                 pcmk__rsc_info(history->rsc,
3928                                "Probe found %s active on %s at %s",
3929                                history->rsc->id, pcmk__node_name(history->node),
3930                                last_change_s);
3931                 free(last_change_s);
3932             }
3933             break;
3934 
3935         case PCMK_OCF_NOT_RUNNING:
3936             if (is_probe
3937                 || (history->expected_exit_status == history->exit_status)
3938                 || !pcmk_is_set(history->rsc->flags, pcmk__rsc_managed)) {
3939 
3940                 /* For probes, recurring monitors for the Stopped role, and
3941                  * unmanaged resources, "not running" is not considered a
3942                  * failure.
3943                  */
3944                 remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
3945                 history->rsc->priv->orig_role = pcmk_role_stopped;
3946                 *on_fail = pcmk__on_fail_ignore;
3947                 pe__set_next_role(history->rsc, pcmk_role_unknown,
3948                                   "not running");
3949             }
3950             break;
3951 
3952         case PCMK_OCF_RUNNING_PROMOTED:
3953             if (is_probe
3954                 && (history->exit_status != history->expected_exit_status)) {
3955                 char *last_change_s = last_change_str(history->xml);
3956 
3957                 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3958                 pcmk__rsc_info(history->rsc,
3959                                "Probe found %s active and promoted on %s at %s",
3960                                 history->rsc->id,
3961                                 pcmk__node_name(history->node), last_change_s);
3962                 free(last_change_s);
3963             }
3964             if (!expired
3965                 || (history->exit_status == history->expected_exit_status)) {
3966                 history->rsc->priv->orig_role = pcmk_role_promoted;
3967             }
3968             break;
3969 
3970         case PCMK_OCF_FAILED_PROMOTED:
3971             if (!expired) {
3972                 history->rsc->priv->orig_role = pcmk_role_promoted;
3973             }
3974             remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
3975             break;
3976 
3977         case PCMK_OCF_NOT_CONFIGURED:
3978             remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
3979             break;
3980 
3981         case PCMK_OCF_UNIMPLEMENT_FEATURE:
3982             {
3983                 guint interval_ms = 0;
3984                 crm_element_value_ms(history->xml, PCMK_META_INTERVAL,
3985                                      &interval_ms);
3986 
3987                 if (interval_ms == 0) {
3988                     if (!expired) {
3989                         block_if_unrecoverable(history);
3990                     }
3991                     remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3992                                   "exit status");
3993                 } else {
3994                     remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
3995                                   "exit status");
3996                 }
3997             }
3998             break;
3999 
4000         case PCMK_OCF_NOT_INSTALLED:
4001         case PCMK_OCF_INVALID_PARAM:
4002         case PCMK_OCF_INSUFFICIENT_PRIV:
4003             if (!expired) {
4004                 block_if_unrecoverable(history);
4005             }
4006             remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
4007             break;
4008 
4009         default:
4010             if (history->execution_status == PCMK_EXEC_DONE) {
4011                 char *last_change_s = last_change_str(history->xml);
4012 
4013                 crm_info("Treating unknown exit status %d from %s of %s "
4014                          "on %s at %s as failure",
4015                          history->exit_status, task, history->rsc->id,
4016                          pcmk__node_name(history->node), last_change_s);
4017                 remap_because(history, &why, PCMK_EXEC_ERROR,
4018                               "unknown exit status");
4019                 free(last_change_s);
4020             }
4021             break;
4022     }
4023 
4024 remap_done:
4025     if (why != NULL) {
4026         pcmk__rsc_trace(history->rsc,
4027                         "Remapped %s result from [%s: %s] to [%s: %s] "
4028                         "because of %s",
4029                         history->key, pcmk_exec_status_str(orig_exec_status),
4030                         crm_exit_str(orig_exit_status),
4031                         pcmk_exec_status_str(history->execution_status),
4032                         crm_exit_str(history->exit_status), why);
4033     }
4034 }
4035 
4036 // return TRUE if start or monitor last failure but parameters changed
4037 static bool
4038 should_clear_for_param_change(const xmlNode *xml_op, const char *task,
     /* [previous][next][first][last][top][bottom][index][help] */
4039                               pcmk_resource_t *rsc, pcmk_node_t *node)
4040 {
4041     if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) {
4042         if (pe__bundle_needs_remote_name(rsc)) {
4043             /* We haven't allocated resources yet, so we can't reliably
4044              * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4045              * When that's needed, defer the check until later.
4046              */
4047             pcmk__add_param_check(xml_op, rsc, node, pcmk__check_last_failure);
4048 
4049         } else {
4050             pcmk__op_digest_t *digest_data = NULL;
4051 
4052             digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4053                                                 rsc->priv->scheduler);
4054             switch (digest_data->rc) {
4055                 case pcmk__digest_unknown:
4056                     crm_trace("Resource %s history entry %s on %s"
4057                               " has no digest to compare",
4058                               rsc->id, pcmk__xe_history_key(xml_op),
4059                               node->priv->id);
4060                     break;
4061                 case pcmk__digest_match:
4062                     break;
4063                 default:
4064                     return TRUE;
4065             }
4066         }
4067     }
4068     return FALSE;
4069 }
4070 
4071 // Order action after fencing of remote node, given connection rsc
4072 static void
4073 order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
     /* [previous][next][first][last][top][bottom][index][help] */
4074                            pcmk_scheduler_t *scheduler)
4075 {
4076     pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
4077 
4078     if (remote_node) {
4079         pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4080                                            FALSE, scheduler);
4081 
4082         order_actions(fence, action, pcmk__ar_first_implies_then);
4083     }
4084 }
4085 
4086 static bool
4087 should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
     /* [previous][next][first][last][top][bottom][index][help] */
4088                               guint interval_ms, bool is_last_failure)
4089 {
4090     /* Clearing failures of recurring monitors has special concerns. The
4091      * executor reports only changes in the monitor result, so if the
4092      * monitor is still active and still getting the same failure result,
4093      * that will go undetected after the failure is cleared.
4094      *
4095      * Also, the operation history will have the time when the recurring
4096      * monitor result changed to the given code, not the time when the
4097      * result last happened.
4098      *
4099      * @TODO We probably should clear such failures only when the failure
4100      * timeout has passed since the last occurrence of the failed result.
4101      * However we don't record that information. We could maybe approximate
4102      * that by clearing only if there is a more recent successful monitor or
4103      * stop result, but we don't even have that information at this point
4104      * since we are still unpacking the resource's operation history.
4105      *
4106      * This is especially important for remote connection resources with a
4107      * reconnect interval, so in that case, we skip clearing failures
4108      * if the remote node hasn't been fenced.
4109      */
4110     if ((rsc->priv->remote_reconnect_ms > 0U)
4111         && pcmk_is_set(rsc->priv->scheduler->flags,
4112                        pcmk__sched_fencing_enabled)
4113         && (interval_ms != 0)
4114         && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4115 
4116         pcmk_node_t *remote_node = pcmk_find_node(rsc->priv->scheduler,
4117                                                   rsc->id);
4118 
4119         if (remote_node && !pcmk_is_set(remote_node->priv->flags,
4120                                         pcmk__node_remote_fenced)) {
4121             if (is_last_failure) {
4122                 crm_info("Waiting to clear monitor failure for remote node %s"
4123                          " until fencing has occurred", rsc->id);
4124             }
4125             return TRUE;
4126         }
4127     }
4128     return FALSE;
4129 }
4130 
4131 /*!
4132  * \internal
4133  * \brief Check operation age and schedule failure clearing when appropriate
4134  *
4135  * This function has two distinct purposes. The first is to check whether an
4136  * operation history entry is expired (i.e. the resource has a failure timeout,
4137  * the entry is older than the timeout, and the resource either has no fail
4138  * count or its fail count is entirely older than the timeout). The second is to
4139  * schedule fail count clearing when appropriate (i.e. the operation is expired
4140  * and either the resource has an expired fail count or the operation is a
4141  * last_failure for a remote connection resource with a reconnect interval,
4142  * or the operation is a last_failure for a start or monitor operation and the
4143  * resource's parameters have changed since the operation).
4144  *
4145  * \param[in,out] history  Parsed action result history
4146  *
4147  * \return true if operation history entry is expired, otherwise false
4148  */
4149 static bool
4150 check_operation_expiry(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
4151 {
4152     bool expired = false;
4153     bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
4154     time_t last_run = 0;
4155     int unexpired_fail_count = 0;
4156     const char *clear_reason = NULL;
4157     const guint expiration_sec =
4158         pcmk__timeout_ms2s(history->rsc->priv->failure_expiration_ms);
4159     pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
4160 
4161     if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4162         pcmk__rsc_trace(history->rsc,
4163                         "Resource history entry %s on %s is not expired: "
4164                         "Not Installed does not expire",
4165                         history->id, pcmk__node_name(history->node));
4166         return false; // "Not installed" must always be cleared manually
4167     }
4168 
4169     if ((expiration_sec > 0)
4170         && (crm_element_value_epoch(history->xml, PCMK_XA_LAST_RC_CHANGE,
4171                                     &last_run) == 0)) {
4172 
4173         /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
4174          * timestamp
4175          */
4176 
4177         time_t now = pcmk__scheduler_epoch_time(scheduler);
4178         time_t last_failure = 0;
4179 
4180         // Is this particular operation history older than the failure timeout?
4181         if ((now >= (last_run + expiration_sec))
4182             && !should_ignore_failure_timeout(history->rsc, history->task,
4183                                               history->interval_ms,
4184                                               is_last_failure)) {
4185             expired = true;
4186         }
4187 
4188         // Does the resource as a whole have an unexpired fail count?
4189         unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4190                                                 &last_failure,
4191                                                 pcmk__fc_effective,
4192                                                 history->xml);
4193 
4194         // Update scheduler recheck time according to *last* failure
4195         crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d "
4196                   "expiration=%s last-failure@%lld",
4197                   history->id, (long long) last_run, (expired? "" : "not "),
4198                   (long long) now, unexpired_fail_count,
4199                   pcmk__readable_interval(expiration_sec * 1000),
4200                   (long long) last_failure);
4201         last_failure += expiration_sec + 1;
4202         if (unexpired_fail_count && (now < last_failure)) {
4203             pcmk__update_recheck_time(last_failure, scheduler,
4204                                       "fail count expiration");
4205         }
4206     }
4207 
4208     if (expired) {
4209         if (pe_get_failcount(history->node, history->rsc, NULL,
4210                              pcmk__fc_default, history->xml)) {
4211             // There is a fail count ignoring timeout
4212 
4213             if (unexpired_fail_count == 0) {
4214                 // There is no fail count considering timeout
4215                 clear_reason = "it expired";
4216 
4217             } else {
4218                 /* This operation is old, but there is an unexpired fail count.
4219                  * In a properly functioning cluster, this should only be
4220                  * possible if this operation is not a failure (otherwise the
4221                  * fail count should be expired too), so this is really just a
4222                  * failsafe.
4223                  */
4224                 pcmk__rsc_trace(history->rsc,
4225                                 "Resource history entry %s on %s is not "
4226                                 "expired: Unexpired fail count",
4227                                 history->id, pcmk__node_name(history->node));
4228                 expired = false;
4229             }
4230 
4231         } else if (is_last_failure
4232                    && (history->rsc->priv->remote_reconnect_ms > 0U)) {
4233             /* Clear any expired last failure when reconnect interval is set,
4234              * even if there is no fail count.
4235              */
4236             clear_reason = "reconnect interval is set";
4237         }
4238     }
4239 
4240     if (!expired && is_last_failure
4241         && should_clear_for_param_change(history->xml, history->task,
4242                                          history->rsc, history->node)) {
4243         clear_reason = "resource parameters have changed";
4244     }
4245 
4246     if (clear_reason != NULL) {
4247         pcmk_action_t *clear_op = NULL;
4248 
4249         // Schedule clearing of the fail count
4250         clear_op = pe__clear_failcount(history->rsc, history->node,
4251                                        clear_reason, scheduler);
4252 
4253         if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)
4254             && (history->rsc->priv->remote_reconnect_ms > 0)) {
4255             /* If we're clearing a remote connection due to a reconnect
4256              * interval, we want to wait until any scheduled fencing
4257              * completes.
4258              *
4259              * We could limit this to remote_node->details->unclean, but at
4260              * this point, that's always true (it won't be reliable until
4261              * after unpack_node_history() is done).
4262              */
4263             crm_info("Clearing %s failure will wait until any scheduled "
4264                      "fencing of %s completes",
4265                      history->task, history->rsc->id);
4266             order_after_remote_fencing(clear_op, history->rsc, scheduler);
4267         }
4268     }
4269 
4270     if (expired && (history->interval_ms == 0)
4271         && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4272         switch (history->exit_status) {
4273             case PCMK_OCF_OK:
4274             case PCMK_OCF_NOT_RUNNING:
4275             case PCMK_OCF_RUNNING_PROMOTED:
4276             case PCMK_OCF_DEGRADED:
4277             case PCMK_OCF_DEGRADED_PROMOTED:
4278                 // Don't expire probes that return these values
4279                 pcmk__rsc_trace(history->rsc,
4280                                 "Resource history entry %s on %s is not "
4281                                 "expired: Probe result",
4282                              history->id, pcmk__node_name(history->node));
4283                 expired = false;
4284                 break;
4285         }
4286     }
4287 
4288     return expired;
4289 }
4290 
4291 int
4292 pe__target_rc_from_xml(const xmlNode *xml_op)
     /* [previous][next][first][last][top][bottom][index][help] */
4293 {
4294     int target_rc = 0;
4295     const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY);
4296 
4297     if (key == NULL) {
4298         return -1;
4299     }
4300     decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4301     return target_rc;
4302 }
4303 
4304 /*!
4305  * \internal
4306  * \brief Update a resource's state for an action result
4307  *
4308  * \param[in,out] history       Parsed action history entry
4309  * \param[in]     exit_status   Exit status to base new state on
4310  * \param[in]     last_failure  Resource's last_failure entry, if known
4311  * \param[in,out] on_fail       Resource's current failure handling
4312  */
4313 static void
4314 update_resource_state(struct action_history *history, int exit_status,
     /* [previous][next][first][last][top][bottom][index][help] */
4315                       const xmlNode *last_failure,
4316                       enum pcmk__on_fail *on_fail)
4317 {
4318     bool clear_past_failure = false;
4319 
4320     if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4321         || (!pcmk__is_bundled(history->rsc)
4322             && pcmk_xe_mask_probe_failure(history->xml))) {
4323         history->rsc->priv->orig_role = pcmk_role_stopped;
4324 
4325     } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4326         clear_past_failure = true;
4327 
4328     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4329                             pcmk__str_none)) {
4330         if ((last_failure != NULL)
4331             && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4332                             pcmk__str_none)) {
4333             clear_past_failure = true;
4334         }
4335         if (history->rsc->priv->orig_role < pcmk_role_started) {
4336             set_active(history->rsc);
4337         }
4338 
4339     } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4340         history->rsc->priv->orig_role = pcmk_role_started;
4341         clear_past_failure = true;
4342 
4343     } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4344         history->rsc->priv->orig_role = pcmk_role_stopped;
4345         clear_past_failure = true;
4346 
4347     } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4348                             pcmk__str_none)) {
4349         history->rsc->priv->orig_role = pcmk_role_promoted;
4350         clear_past_failure = true;
4351 
4352     } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4353                             pcmk__str_none)) {
4354         if (*on_fail == pcmk__on_fail_demote) {
4355             /* Demote clears an error only if
4356              * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
4357              */
4358             clear_past_failure = true;
4359         }
4360         history->rsc->priv->orig_role = pcmk_role_unpromoted;
4361 
4362     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4363                             pcmk__str_none)) {
4364         history->rsc->priv->orig_role = pcmk_role_started;
4365         clear_past_failure = true;
4366 
4367     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4368                             pcmk__str_none)) {
4369         unpack_migrate_to_success(history);
4370 
4371     } else if (history->rsc->priv->orig_role < pcmk_role_started) {
4372         pcmk__rsc_trace(history->rsc, "%s active on %s",
4373                         history->rsc->id, pcmk__node_name(history->node));
4374         set_active(history->rsc);
4375     }
4376 
4377     if (!clear_past_failure) {
4378         return;
4379     }
4380 
4381     switch (*on_fail) {
4382         case pcmk__on_fail_stop:
4383         case pcmk__on_fail_ban:
4384         case pcmk__on_fail_standby_node:
4385         case pcmk__on_fail_fence_node:
4386             pcmk__rsc_trace(history->rsc,
4387                             "%s (%s) is not cleared by a completed %s",
4388                             history->rsc->id, pcmk__on_fail_text(*on_fail),
4389                             history->task);
4390             break;
4391 
4392         case pcmk__on_fail_block:
4393         case pcmk__on_fail_ignore:
4394         case pcmk__on_fail_demote:
4395         case pcmk__on_fail_restart:
4396         case pcmk__on_fail_restart_container:
4397             *on_fail = pcmk__on_fail_ignore;
4398             pe__set_next_role(history->rsc, pcmk_role_unknown,
4399                               "clear past failures");
4400             break;
4401 
4402         case pcmk__on_fail_reset_remote:
4403             if (history->rsc->priv->remote_reconnect_ms == 0U) {
4404                 /* With no reconnect interval, the connection is allowed to
4405                  * start again after the remote node is fenced and
4406                  * completely stopped. (With a reconnect interval, we wait
4407                  * for the failure to be cleared entirely before attempting
4408                  * to reconnect.)
4409                  */
4410                 *on_fail = pcmk__on_fail_ignore;
4411                 pe__set_next_role(history->rsc, pcmk_role_unknown,
4412                                   "clear past failures and reset remote");
4413             }
4414             break;
4415     }
4416 }
4417 
4418 /*!
4419  * \internal
4420  * \brief Check whether a given history entry matters for resource state
4421  *
4422  * \param[in] history  Parsed action history entry
4423  *
4424  * \return true if action can affect resource state, otherwise false
4425  */
4426 static inline bool
4427 can_affect_state(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
4428 {
4429      return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4430                              PCMK_ACTION_START, PCMK_ACTION_STOP,
4431                              PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
4432                              PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
4433                              "asyncmon", NULL);
4434 }
4435 
4436 /*!
4437  * \internal
4438  * \brief Unpack execution/exit status and exit reason from a history entry
4439  *
4440  * \param[in,out] history  Action history entry to unpack
4441  *
4442  * \return Standard Pacemaker return code
4443  */
4444 static int
4445 unpack_action_result(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
4446 {
4447     if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS,
4448                                &(history->execution_status)) < 0)
4449         || (history->execution_status < PCMK_EXEC_PENDING)
4450         || (history->execution_status > PCMK_EXEC_MAX)
4451         || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4452         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4453                          "with invalid " PCMK__XA_OP_STATUS " '%s'",
4454                          history->id, history->rsc->id,
4455                          pcmk__node_name(history->node),
4456                          pcmk__s(crm_element_value(history->xml,
4457                                                    PCMK__XA_OP_STATUS),
4458                                  ""));
4459         return pcmk_rc_unpack_error;
4460     }
4461     if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE,
4462                                &(history->exit_status)) < 0)
4463         || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4464         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4465                          "with invalid " PCMK__XA_RC_CODE " '%s'",
4466                          history->id, history->rsc->id,
4467                          pcmk__node_name(history->node),
4468                          pcmk__s(crm_element_value(history->xml,
4469                                                    PCMK__XA_RC_CODE),
4470                                  ""));
4471         return pcmk_rc_unpack_error;
4472     }
4473     history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON);
4474     return pcmk_rc_ok;
4475 }
4476 
4477 /*!
4478  * \internal
4479  * \brief Process an action history entry whose result expired
4480  *
4481  * \param[in,out] history           Parsed action history entry
4482  * \param[in]     orig_exit_status  Action exit status before remapping
4483  *
4484  * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the
4485  *         entry needs no further processing)
4486  */
4487 static int
4488 process_expired_result(struct action_history *history, int orig_exit_status)
     /* [previous][next][first][last][top][bottom][index][help] */
4489 {
4490     if (!pcmk__is_bundled(history->rsc)
4491         && pcmk_xe_mask_probe_failure(history->xml)
4492         && (orig_exit_status != history->expected_exit_status)) {
4493 
4494         if (history->rsc->priv->orig_role <= pcmk_role_stopped) {
4495             history->rsc->priv->orig_role = pcmk_role_unknown;
4496         }
4497         crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
4498                   "Masked failure expired",
4499                   history->id, history->rsc->id,
4500                   pcmk__node_name(history->node));
4501         return pcmk_rc_ok;
4502     }
4503 
4504     if (history->exit_status == history->expected_exit_status) {
4505         return pcmk_rc_undetermined; // Only failures expire
4506     }
4507 
4508     if (history->interval_ms == 0) {
4509         crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
4510                    "Expired failure",
4511                    history->id, history->task, history->rsc->id,
4512                    pcmk__node_name(history->node));
4513         return pcmk_rc_ok;
4514     }
4515 
4516     if (history->node->details->online && !history->node->details->unclean) {
4517         /* Reschedule the recurring action. schedule_cancel() won't work at
4518          * this stage, so as a hacky workaround, forcibly change the restart
4519          * digest so pcmk__check_action_config() does what we want later.
4520          *
4521          * @TODO We should skip this if there is a newer successful monitor.
4522          *       Also, this causes rescheduling only if the history entry
4523          *       has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
4524          *       scheduler regression test doesn't, but that may not be a
4525          *       realistic scenario in production).
4526          */
4527         crm_notice("Rescheduling %s-interval %s of %s on %s "
4528                    "after failure expired",
4529                    pcmk__readable_interval(history->interval_ms), history->task,
4530                    history->rsc->id, pcmk__node_name(history->node));
4531         crm_xml_add(history->xml, PCMK__XA_OP_RESTART_DIGEST,
4532                     "calculated-failure-timeout");
4533         return pcmk_rc_ok;
4534     }
4535 
4536     return pcmk_rc_undetermined;
4537 }
4538 
4539 /*!
4540  * \internal
4541  * \brief Process a masked probe failure
4542  *
4543  * \param[in,out] history           Parsed action history entry
4544  * \param[in]     orig_exit_status  Action exit status before remapping
4545  * \param[in]     last_failure      Resource's last_failure entry, if known
4546  * \param[in,out] on_fail           Resource's current failure handling
4547  */
4548 static void
4549 mask_probe_failure(struct action_history *history, int orig_exit_status,
     /* [previous][next][first][last][top][bottom][index][help] */
4550                    const xmlNode *last_failure,
4551                    enum pcmk__on_fail *on_fail)
4552 {
4553     pcmk_resource_t *ban_rsc = history->rsc;
4554 
4555     if (!pcmk_is_set(history->rsc->flags, pcmk__rsc_unique)) {
4556         ban_rsc = uber_parent(history->rsc);
4557     }
4558 
4559     crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
4560                crm_exit_str(orig_exit_status), history->rsc->id,
4561                pcmk__node_name(history->node));
4562     update_resource_state(history, history->expected_exit_status, last_failure,
4563                           on_fail);
4564     crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name);
4565 
4566     record_failed_op(history);
4567     resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
4568                       "masked-probe-failure", ban_rsc->priv->scheduler);
4569 }
4570 
4571 /*!
4572  * \internal Check whether a given failure is for a given pending action
4573  *
4574  * \param[in] history       Parsed history entry for pending action
4575  * \param[in] last_failure  Resource's last_failure entry, if known
4576  *
4577  * \return true if \p last_failure is failure of pending action in \p history,
4578  *         otherwise false
4579  * \note Both \p history and \p last_failure must come from the same
4580  *       \c PCMK__XE_LRM_RESOURCE block, as node and resource are assumed to be
4581  *       the same.
4582  */
4583 static bool
4584 failure_is_newer(const struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
4585                  const xmlNode *last_failure)
4586 {
4587     guint failure_interval_ms = 0U;
4588     long long failure_change = 0LL;
4589     long long this_change = 0LL;
4590 
4591     if (last_failure == NULL) {
4592         return false; // Resource has no last_failure entry
4593     }
4594 
4595     if (!pcmk__str_eq(history->task,
4596                       crm_element_value(last_failure, PCMK_XA_OPERATION),
4597                       pcmk__str_none)) {
4598         return false; // last_failure is for different action
4599     }
4600 
4601     if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL,
4602                               &failure_interval_ms) != pcmk_ok)
4603         || (history->interval_ms != failure_interval_ms)) {
4604         return false; // last_failure is for action with different interval
4605     }
4606 
4607     if ((pcmk__scan_ll(crm_element_value(history->xml, PCMK_XA_LAST_RC_CHANGE),
4608                        &this_change, 0LL) != pcmk_rc_ok)
4609         || (pcmk__scan_ll(crm_element_value(last_failure,
4610                                             PCMK_XA_LAST_RC_CHANGE),
4611                           &failure_change, 0LL) != pcmk_rc_ok)
4612         || (failure_change < this_change)) {
4613         return false; // Failure is not known to be newer
4614     }
4615 
4616     return true;
4617 }
4618 
4619 /*!
4620  * \internal
4621  * \brief Update a resource's role etc. for a pending action
4622  *
4623  * \param[in,out] history       Parsed history entry for pending action
4624  * \param[in]     last_failure  Resource's last_failure entry, if known
4625  */
4626 static void
4627 process_pending_action(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
4628                        const xmlNode *last_failure)
4629 {
4630     /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4631      * and there might be a RSC_monitor_INTERVAL entry with the last successful
4632      * or pending result.
4633      *
4634      * If last_failure contains the failure of the pending recurring monitor
4635      * we're processing here, and is newer, the action is no longer pending.
4636      * (Pending results have call ID -1, which sorts last, so the last failure
4637      * if any should be known.)
4638      */
4639     if (failure_is_newer(history, last_failure)) {
4640         return;
4641     }
4642 
4643     if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4644         pcmk__set_rsc_flags(history->rsc, pcmk__rsc_start_pending);
4645         set_active(history->rsc);
4646 
4647     } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4648         history->rsc->priv->orig_role = pcmk_role_promoted;
4649 
4650     } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4651                && history->node->details->unclean) {
4652         /* A migrate_to action is pending on a unclean source, so force a stop
4653          * on the target.
4654          */
4655         const char *migrate_target = NULL;
4656         pcmk_node_t *target = NULL;
4657 
4658         migrate_target = crm_element_value(history->xml,
4659                                            PCMK__META_MIGRATE_TARGET);
4660         target = pcmk_find_node(history->rsc->priv->scheduler,
4661                                 migrate_target);
4662         if (target != NULL) {
4663             stop_action(history->rsc, target, FALSE);
4664         }
4665     }
4666 
4667     if (history->rsc->priv->pending_action != NULL) {
4668         /* There should never be multiple pending actions, but as a failsafe,
4669          * just remember the first one processed for display purposes.
4670          */
4671         return;
4672     }
4673 
4674     if (pcmk_is_probe(history->task, history->interval_ms)) {
4675         /* Pending probes are currently never displayed, even if pending
4676          * operations are requested. If we ever want to change that,
4677          * enable the below and the corresponding part of
4678          * native.c:native_pending_action().
4679          */
4680 #if 0
4681         history->rsc->private->pending_action = strdup("probe");
4682         history->rsc->private->pending_node = history->node;
4683 #endif
4684     } else {
4685         history->rsc->priv->pending_action = strdup(history->task);
4686         history->rsc->priv->pending_node = history->node;
4687     }
4688 }
4689 
4690 static void
4691 unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
     /* [previous][next][first][last][top][bottom][index][help] */
4692               xmlNode **last_failure, enum pcmk__on_fail *on_fail)
4693 {
4694     int old_rc = 0;
4695     bool expired = false;
4696     pcmk_resource_t *parent = rsc;
4697     enum rsc_role_e fail_role = pcmk_role_unknown;
4698     enum pcmk__on_fail failure_strategy = pcmk__on_fail_restart;
4699 
4700     struct action_history history = {
4701         .rsc = rsc,
4702         .node = node,
4703         .xml = xml_op,
4704         .execution_status = PCMK_EXEC_UNKNOWN,
4705     };
4706 
4707     CRM_CHECK(rsc && node && xml_op, return);
4708 
4709     history.id = pcmk__xe_id(xml_op);
4710     if (history.id == NULL) {
4711         pcmk__config_err("Ignoring resource history entry for %s on %s "
4712                          "without ID", rsc->id, pcmk__node_name(node));
4713         return;
4714     }
4715 
4716     // Task and interval
4717     history.task = crm_element_value(xml_op, PCMK_XA_OPERATION);
4718     if (history.task == NULL) {
4719         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4720                          "without " PCMK_XA_OPERATION,
4721                          history.id, rsc->id, pcmk__node_name(node));
4722         return;
4723     }
4724     crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
4725     if (!can_affect_state(&history)) {
4726         pcmk__rsc_trace(rsc,
4727                         "Ignoring resource history entry %s for %s on %s "
4728                         "with irrelevant action '%s'",
4729                         history.id, rsc->id, pcmk__node_name(node),
4730                         history.task);
4731         return;
4732     }
4733 
4734     if (unpack_action_result(&history) != pcmk_rc_ok) {
4735         return; // Error already logged
4736     }
4737 
4738     history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4739     history.key = pcmk__xe_history_key(xml_op);
4740     crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
4741 
4742     pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4743                     history.id, history.task, history.call_id,
4744                     pcmk__node_name(node),
4745                     pcmk_exec_status_str(history.execution_status),
4746                     crm_exit_str(history.exit_status));
4747 
4748     if (node->details->unclean) {
4749         pcmk__rsc_trace(rsc,
4750                         "%s is running on %s, which is unclean (further action "
4751                         "depends on value of stop's on-fail attribute)",
4752                         rsc->id, pcmk__node_name(node));
4753     }
4754 
4755     expired = check_operation_expiry(&history);
4756     old_rc = history.exit_status;
4757 
4758     remap_operation(&history, on_fail, expired);
4759 
4760     if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4761         goto done;
4762     }
4763 
4764     if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4765         mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4766         goto done;
4767     }
4768 
4769     if (!pcmk_is_set(rsc->flags, pcmk__rsc_unique)) {
4770         parent = uber_parent(rsc);
4771     }
4772 
4773     switch (history.execution_status) {
4774         case PCMK_EXEC_PENDING:
4775             process_pending_action(&history, *last_failure);
4776             goto done;
4777 
4778         case PCMK_EXEC_DONE:
4779             update_resource_state(&history, history.exit_status, *last_failure,
4780                                   on_fail);
4781             goto done;
4782 
4783         case PCMK_EXEC_NOT_INSTALLED:
4784             unpack_failure_handling(&history, &failure_strategy, &fail_role);
4785             if (failure_strategy == pcmk__on_fail_ignore) {
4786                 crm_warn("Cannot ignore failed %s of %s on %s: "
4787                          "Resource agent doesn't exist "
4788                          QB_XS " status=%d rc=%d id=%s",
4789                          history.task, rsc->id, pcmk__node_name(node),
4790                          history.execution_status, history.exit_status,
4791                          history.id);
4792                 /* Also for printing it as "FAILED" by marking it as
4793                  * pcmk__rsc_failed later
4794                  */
4795                 *on_fail = pcmk__on_fail_ban;
4796             }
4797             resource_location(parent, node, -PCMK_SCORE_INFINITY,
4798                               "hard-error", rsc->priv->scheduler);
4799             unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4800                                   last_failure, on_fail);
4801             goto done;
4802 
4803         case PCMK_EXEC_NOT_CONNECTED:
4804             if (pcmk__is_pacemaker_remote_node(node)
4805                 && pcmk_is_set(node->priv->remote->flags,
4806                                pcmk__rsc_managed)) {
4807                 /* We should never get into a situation where a managed remote
4808                  * connection resource is considered OK but a resource action
4809                  * behind the connection gets a "not connected" status. But as a
4810                  * fail-safe in case a bug or unusual circumstances do lead to
4811                  * that, ensure the remote connection is considered failed.
4812                  */
4813                 pcmk__set_rsc_flags(node->priv->remote,
4814                                     pcmk__rsc_failed|pcmk__rsc_stop_if_failed);
4815             }
4816             break; // Not done, do error handling
4817 
4818         case PCMK_EXEC_ERROR:
4819         case PCMK_EXEC_ERROR_HARD:
4820         case PCMK_EXEC_ERROR_FATAL:
4821         case PCMK_EXEC_TIMEOUT:
4822         case PCMK_EXEC_NOT_SUPPORTED:
4823         case PCMK_EXEC_INVALID:
4824             break; // Not done, do error handling
4825 
4826         default: // No other value should be possible at this point
4827             break;
4828     }
4829 
4830     unpack_failure_handling(&history, &failure_strategy, &fail_role);
4831     if ((failure_strategy == pcmk__on_fail_ignore)
4832         || ((failure_strategy == pcmk__on_fail_restart_container)
4833             && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4834 
4835         char *last_change_s = last_change_str(xml_op);
4836 
4837         crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4838                  QB_XS " %s",
4839                  history.task, crm_exit_str(history.exit_status),
4840                  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4841                  pcmk__s(history.exit_reason, ""), rsc->id,
4842                  pcmk__node_name(node), last_change_s, history.id);
4843         free(last_change_s);
4844 
4845         update_resource_state(&history, history.expected_exit_status,
4846                               *last_failure, on_fail);
4847         crm_xml_add(xml_op, PCMK_XA_UNAME, node->priv->name);
4848         pcmk__set_rsc_flags(rsc, pcmk__rsc_ignore_failure);
4849 
4850         record_failed_op(&history);
4851 
4852         if ((failure_strategy == pcmk__on_fail_restart_container)
4853             && (*on_fail <= pcmk__on_fail_restart)) {
4854             *on_fail = failure_strategy;
4855         }
4856 
4857     } else {
4858         unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4859                               last_failure, on_fail);
4860 
4861         if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4862             uint8_t log_level = LOG_ERR;
4863 
4864             if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4865                 log_level = LOG_NOTICE;
4866             }
4867             do_crm_log(log_level,
4868                        "Preventing %s from restarting on %s because "
4869                        "of hard failure (%s%s%s) " QB_XS " %s",
4870                        parent->id, pcmk__node_name(node),
4871                        crm_exit_str(history.exit_status),
4872                        (pcmk__str_empty(history.exit_reason)? "" : ": "),
4873                        pcmk__s(history.exit_reason, ""), history.id);
4874             resource_location(parent, node, -PCMK_SCORE_INFINITY,
4875                               "hard-error", rsc->priv->scheduler);
4876 
4877         } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4878             pcmk__sched_err(rsc->priv->scheduler,
4879                             "Preventing %s from restarting anywhere because "
4880                             "of fatal failure (%s%s%s) " QB_XS " %s",
4881                             parent->id, crm_exit_str(history.exit_status),
4882                             (pcmk__str_empty(history.exit_reason)? "" : ": "),
4883                             pcmk__s(history.exit_reason, ""), history.id);
4884             resource_location(parent, NULL, -PCMK_SCORE_INFINITY,
4885                               "fatal-error", rsc->priv->scheduler);
4886         }
4887     }
4888 
4889 done:
4890     pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
4891                     rsc->id, pcmk__node_name(node), history.id,
4892                     pcmk_role_text(rsc->priv->orig_role),
4893                     pcmk_role_text(rsc->priv->next_role));
4894 }
4895 
4896 /*!
4897  * \internal
4898  * \brief Insert a node attribute with value into a \c GHashTable
4899  *
4900  * \param[in,out] key        Key to insert (either freed or owned by
4901  *                           \p user_data upon return)
4902  * \param[in]     value      Value to insert (owned by \p user_data upon return)
4903  * \param[in]     user_data  \c GHashTable to insert into
4904  */
4905 static gboolean
4906 insert_attr(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
4907 {
4908     GHashTable *table = user_data;
4909 
4910     g_hash_table_insert(table, key, value);
4911     return TRUE;
4912 }
4913 
4914 static void
4915 add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
     /* [previous][next][first][last][top][bottom][index][help] */
4916                pcmk_scheduler_t *scheduler)
4917 {
4918     const char *cluster_name = NULL;
4919     const char *dc_id = crm_element_value(scheduler->input, PCMK_XA_DC_UUID);
4920     const pcmk_rule_input_t rule_input = {
4921         .now = scheduler->priv->now,
4922     };
4923 
4924     pcmk__insert_dup(node->priv->attrs,
4925                      CRM_ATTR_UNAME, node->priv->name);
4926 
4927     pcmk__insert_dup(node->priv->attrs, CRM_ATTR_ID, node->priv->id);
4928 
4929     if ((scheduler->dc_node == NULL)
4930         && pcmk__str_eq(node->priv->id, dc_id, pcmk__str_casei)) {
4931 
4932         scheduler->dc_node = node;
4933         pcmk__insert_dup(node->priv->attrs,
4934                          CRM_ATTR_IS_DC, PCMK_VALUE_TRUE);
4935 
4936     } else if (!pcmk__same_node(node, scheduler->dc_node)) {
4937         pcmk__insert_dup(node->priv->attrs,
4938                          CRM_ATTR_IS_DC, PCMK_VALUE_FALSE);
4939     }
4940 
4941     cluster_name = g_hash_table_lookup(scheduler->priv->options,
4942                                        PCMK_OPT_CLUSTER_NAME);
4943     if (cluster_name) {
4944         pcmk__insert_dup(node->priv->attrs, CRM_ATTR_CLUSTER_NAME,
4945                          cluster_name);
4946     }
4947 
4948     if (overwrite) {
4949         /* @TODO Try to reorder some unpacking so that we don't need the
4950          * overwrite argument or to unpack into a temporary table
4951          */
4952         GHashTable *unpacked = pcmk__strkey_table(free, free);
4953 
4954         pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
4955                                    &rule_input, unpacked, NULL, scheduler);
4956         g_hash_table_foreach_steal(unpacked, insert_attr, node->priv->attrs);
4957         g_hash_table_destroy(unpacked);
4958 
4959     } else {
4960         pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES,
4961                                    &rule_input, node->priv->attrs, NULL,
4962                                    scheduler);
4963     }
4964 
4965     pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_input,
4966                                node->priv->utilization, NULL, scheduler);
4967 
4968     if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
4969                         pcmk__rsc_node_current) == NULL) {
4970         const char *site_name = pcmk__node_attr(node, "site-name", NULL,
4971                                                 pcmk__rsc_node_current);
4972 
4973         if (site_name) {
4974             pcmk__insert_dup(node->priv->attrs,
4975                              CRM_ATTR_SITE_NAME, site_name);
4976 
4977         } else if (cluster_name) {
4978             /* Default to cluster-name if unset */
4979             pcmk__insert_dup(node->priv->attrs,
4980                              CRM_ATTR_SITE_NAME, cluster_name);
4981         }
4982     }
4983 }
4984 
4985 static GList *
4986 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
     /* [previous][next][first][last][top][bottom][index][help] */
4987 {
4988     int counter = -1;
4989     int stop_index = -1;
4990     int start_index = -1;
4991 
4992     xmlNode *rsc_op = NULL;
4993 
4994     GList *gIter = NULL;
4995     GList *op_list = NULL;
4996     GList *sorted_op_list = NULL;
4997 
4998     /* extract operations */
4999     op_list = NULL;
5000     sorted_op_list = NULL;
5001 
5002     for (rsc_op = pcmk__xe_first_child(rsc_entry, PCMK__XE_LRM_RSC_OP, NULL,
5003                                        NULL);
5004          rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
5005 
5006         crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc);
5007         crm_xml_add(rsc_op, PCMK_XA_UNAME, node);
5008         op_list = g_list_prepend(op_list, rsc_op);
5009     }
5010 
5011     if (op_list == NULL) {
5012         /* if there are no operations, there is nothing to do */
5013         return NULL;
5014     }
5015 
5016     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
5017 
5018     /* create active recurring operations as optional */
5019     if (active_filter == FALSE) {
5020         return sorted_op_list;
5021     }
5022 
5023     op_list = NULL;
5024 
5025     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
5026 
5027     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5028         xmlNode *rsc_op = (xmlNode *) gIter->data;
5029 
5030         counter++;
5031 
5032         if (start_index < stop_index) {
5033             crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
5034             break;
5035 
5036         } else if (counter < start_index) {
5037             crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op));
5038             continue;
5039         }
5040         op_list = g_list_append(op_list, rsc_op);
5041     }
5042 
5043     g_list_free(sorted_op_list);
5044     return op_list;
5045 }
5046 
5047 GList *
5048 find_operations(const char *rsc, const char *node, gboolean active_filter,
     /* [previous][next][first][last][top][bottom][index][help] */
5049                 pcmk_scheduler_t *scheduler)
5050 {
5051     GList *output = NULL;
5052     GList *intermediate = NULL;
5053 
5054     xmlNode *tmp = NULL;
5055     xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS,
5056                                            NULL, NULL);
5057 
5058     pcmk_node_t *this_node = NULL;
5059 
5060     xmlNode *node_state = NULL;
5061 
5062     CRM_CHECK(status != NULL, return NULL);
5063 
5064     for (node_state = pcmk__xe_first_child(status, PCMK__XE_NODE_STATE, NULL,
5065                                            NULL);
5066          node_state != NULL;
5067          node_state = pcmk__xe_next(node_state, PCMK__XE_NODE_STATE)) {
5068 
5069         const char *uname = crm_element_value(node_state, PCMK_XA_UNAME);
5070 
5071         if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5072             continue;
5073         }
5074 
5075         this_node = pcmk_find_node(scheduler, uname);
5076         if(this_node == NULL) {
5077             CRM_LOG_ASSERT(this_node != NULL);
5078             continue;
5079 
5080         } else if (pcmk__is_pacemaker_remote_node(this_node)) {
5081             determine_remote_online_status(scheduler, this_node);
5082 
5083         } else {
5084             determine_online_status(node_state, this_node, scheduler);
5085         }
5086 
5087         if (this_node->details->online
5088             || pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) {
5089             /* offline nodes run no resources...
5090              * unless stonith is enabled in which case we need to
5091              *   make sure rsc start events happen after the stonith
5092              */
5093             xmlNode *lrm_rsc = NULL;
5094 
5095             tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
5096                                        NULL);
5097             tmp = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCES, NULL,
5098                                        NULL);
5099 
5100             for (lrm_rsc = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCE,
5101                                                 NULL, NULL);
5102                  lrm_rsc != NULL;
5103                  lrm_rsc = pcmk__xe_next(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
5104 
5105                 const char *rsc_id = crm_element_value(lrm_rsc, PCMK_XA_ID);
5106 
5107                 if ((rsc != NULL)
5108                     && !pcmk__str_eq(rsc_id, rsc, pcmk__str_none)) {
5109                     continue;
5110                 }
5111 
5112                 intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5113                 output = g_list_concat(output, intermediate);
5114             }
5115         }
5116     }
5117 
5118     return output;
5119 }

/* [previous][next][first][last][top][bottom][index][help] */