root/lib/pengine/unpack.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. is_dangling_guest_node
  2. pe_fence_node
  3. set_if_xpath
  4. unpack_config
  5. pe_create_node
  6. expand_remote_rsc_meta
  7. handle_startup_fencing
  8. unpack_nodes
  9. setup_container
  10. unpack_remote_nodes
  11. link_rsc2remotenode
  12. destroy_tag
  13. unpack_resources
  14. pcmk__unpack_fencing_topology
  15. unpack_tags
  16. unpack_ticket_state
  17. unpack_tickets_state
  18. unpack_handle_remote_attrs
  19. unpack_transient_attributes
  20. unpack_node_state
  21. unpack_node_history
  22. unpack_status
  23. unpack_node_member
  24. unpack_node_online
  25. unpack_node_terminate
  26. determine_online_status_no_fencing
  27. pending_too_long
  28. determine_online_status_fencing
  29. determine_remote_online_status
  30. determine_online_status
  31. pe_base_name_end
  32. clone_strip
  33. clone_zero
  34. create_fake_resource
  35. create_anonymous_orphan
  36. find_anonymous_clone
  37. unpack_find_resource
  38. process_orphan_resource
  39. process_rsc_state
  40. process_recurring
  41. calculate_active_ops
  42. unpack_shutdown_lock
  43. unpack_lrm_resource
  44. handle_orphaned_container_fillers
  45. unpack_node_lrm
  46. set_active
  47. set_node_score
  48. find_lrm_op
  49. find_lrm_resource
  50. unknown_on_node
  51. monitor_not_running_after
  52. non_monitor_after
  53. newer_state_after_migrate
  54. get_migration_node_names
  55. add_dangling_migration
  56. unpack_migrate_to_success
  57. unpack_migrate_to_failure
  58. unpack_migrate_from_failure
  59. record_failed_op
  60. last_change_str
  61. cmp_on_fail
  62. ban_from_all_nodes
  63. unpack_failure_handling
  64. unpack_rsc_op_failure
  65. block_if_unrecoverable
  66. remap_because
  67. remap_operation
  68. should_clear_for_param_change
  69. order_after_remote_fencing
  70. should_ignore_failure_timeout
  71. check_operation_expiry
  72. pe__target_rc_from_xml
  73. update_resource_state
  74. can_affect_state
  75. unpack_action_result
  76. process_expired_result
  77. mask_probe_failure
  78. failure_is_newer
  79. process_pending_action
  80. unpack_rsc_op
  81. add_node_attrs
  82. extract_operations
  83. find_operations

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <stdio.h>
  13 #include <string.h>
  14 #include <glib.h>
  15 #include <time.h>
  16 
  17 #include <crm/crm.h>
  18 #include <crm/services.h>
  19 #include <crm/common/xml.h>
  20 #include <crm/common/xml_internal.h>
  21 
  22 #include <crm/common/util.h>
  23 #include <crm/pengine/rules.h>
  24 #include <crm/pengine/internal.h>
  25 #include <pe_status_private.h>
  26 
  27 CRM_TRACE_INIT_DATA(pe_status);
  28 
  29 // A (parsed) resource action history entry
  30 struct action_history {
  31     pcmk_resource_t *rsc;       // Resource that history is for
  32     pcmk_node_t *node;        // Node that history is for
  33     xmlNode *xml;             // History entry XML
  34 
  35     // Parsed from entry XML
  36     const char *id;           // XML ID of history entry
  37     const char *key;          // Operation key of action
  38     const char *task;         // Action name
  39     const char *exit_reason;  // Exit reason given for result
  40     guint interval_ms;        // Action interval
  41     int call_id;              // Call ID of action
  42     int expected_exit_status; // Expected exit status of action
  43     int exit_status;          // Actual exit status of action
  44     int execution_status;     // Execution status of action
  45 };
  46 
  47 /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
  48  * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
  49  * flag is stringified more readably in log messages.
  50  */
  51 #define set_config_flag(scheduler, option, flag) do {                         \
  52         GHashTable *config_hash = (scheduler)->config_hash;                   \
  53         const char *scf_value = pcmk__cluster_option(config_hash, (option));  \
  54                                                                               \
  55         if (scf_value != NULL) {                                              \
  56             if (crm_is_true(scf_value)) {                                     \
  57                 (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__,   \
  58                                     LOG_TRACE, "Scheduler",                   \
  59                                     crm_system_name, (scheduler)->flags,      \
  60                                     (flag), #flag);                           \
  61             } else {                                                          \
  62                 (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
  63                                     LOG_TRACE, "Scheduler",                   \
  64                                     crm_system_name, (scheduler)->flags,      \
  65                                     (flag), #flag);                           \
  66             }                                                                 \
  67         }                                                                     \
  68     } while(0)
  69 
  70 static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
  71                           xmlNode *xml_op, xmlNode **last_failure,
  72                           enum action_fail_response *failed);
  73 static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
  74                                            pcmk_node_t *this_node);
  75 static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
  76                            bool overwrite, pcmk_scheduler_t *scheduler);
  77 static void determine_online_status(const xmlNode *node_state,
  78                                     pcmk_node_t *this_node,
  79                                     pcmk_scheduler_t *scheduler);
  80 
  81 static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
  82                             pcmk_scheduler_t *scheduler);
  83 
  84 
  85 static gboolean
  86 is_dangling_guest_node(pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
  87 {
  88     /* we are looking for a remote-node that was supposed to be mapped to a
  89      * container resource, but all traces of that container have disappeared 
  90      * from both the config and the status section. */
  91     if (pcmk__is_pacemaker_remote_node(node)
  92         && (node->details->remote_rsc != NULL)
  93         && (node->details->remote_rsc->container == NULL)
  94         && pcmk_is_set(node->details->remote_rsc->flags,
  95                        pcmk_rsc_removed_filler)) {
  96         return TRUE;
  97     }
  98 
  99     return FALSE;
 100 }
 101 
 102 /*!
 103  * \brief Schedule a fence action for a node
 104  *
 105  * \param[in,out] scheduler       Scheduler data
 106  * \param[in,out] node            Node to fence
 107  * \param[in]     reason          Text description of why fencing is needed
 108  * \param[in]     priority_delay  Whether to consider
 109  *                                \c PCMK_OPT_PRIORITY_FENCING_DELAY
 110  */
 111 void
 112 pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
 113               const char *reason, bool priority_delay)
 114 {
 115     CRM_CHECK(node, return);
 116 
 117     /* A guest node is fenced by marking its container as failed */
 118     if (pcmk__is_guest_or_bundle_node(node)) {
 119         pcmk_resource_t *rsc = node->details->remote_rsc->container;
 120 
 121         if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
 122             if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
 123                 crm_notice("Not fencing guest node %s "
 124                            "(otherwise would because %s): "
 125                            "its guest resource %s is unmanaged",
 126                            pcmk__node_name(node), reason, rsc->id);
 127             } else {
 128                 pcmk__sched_warn("Guest node %s will be fenced "
 129                                  "(by recovering its guest resource %s): %s",
 130                                  pcmk__node_name(node), rsc->id, reason);
 131 
 132                 /* We don't mark the node as unclean because that would prevent the
 133                  * node from running resources. We want to allow it to run resources
 134                  * in this transition if the recovery succeeds.
 135                  */
 136                 node->details->remote_requires_reset = TRUE;
 137                 pcmk__set_rsc_flags(rsc,
 138                                     pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
 139             }
 140         }
 141 
 142     } else if (is_dangling_guest_node(node)) {
 143         crm_info("Cleaning up dangling connection for guest node %s: "
 144                  "fencing was already done because %s, "
 145                  "and guest resource no longer exists",
 146                  pcmk__node_name(node), reason);
 147         pcmk__set_rsc_flags(node->details->remote_rsc,
 148                             pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
 149 
 150     } else if (pcmk__is_remote_node(node)) {
 151         pcmk_resource_t *rsc = node->details->remote_rsc;
 152 
 153         if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
 154             crm_notice("Not fencing remote node %s "
 155                        "(otherwise would because %s): connection is unmanaged",
 156                        pcmk__node_name(node), reason);
 157         } else if(node->details->remote_requires_reset == FALSE) {
 158             node->details->remote_requires_reset = TRUE;
 159             pcmk__sched_warn("Remote node %s %s: %s",
 160                              pcmk__node_name(node),
 161                              pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
 162                              reason);
 163         }
 164         node->details->unclean = TRUE;
 165         // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
 166         pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
 167 
 168     } else if (node->details->unclean) {
 169         crm_trace("Cluster node %s %s because %s",
 170                   pcmk__node_name(node),
 171                   pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
 172                   reason);
 173 
 174     } else {
 175         pcmk__sched_warn("Cluster node %s %s: %s",
 176                          pcmk__node_name(node),
 177                          pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
 178                          reason);
 179         node->details->unclean = TRUE;
 180         pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
 181     }
 182 }
 183 
 184 // @TODO xpaths can't handle templates, rules, or id-refs
 185 
 186 // nvpair with provides or requires set to unfencing
 187 #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR           \
 188     "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'"   \
 189     "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') "   \
 190     "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
 191 
 192 // unfencing in rsc_defaults or any resource
 193 #define XPATH_ENABLE_UNFENCING \
 194     "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES     \
 195     "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR             \
 196     "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
 197     "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
 198 
 199 static void
 200 set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 201 {
 202     xmlXPathObjectPtr result = NULL;
 203 
 204     if (!pcmk_is_set(scheduler->flags, flag)) {
 205         result = xpath_search(scheduler->input, xpath);
 206         if (result && (numXpathResults(result) > 0)) {
 207             pcmk__set_scheduler_flags(scheduler, flag);
 208         }
 209         freeXpathObject(result);
 210     }
 211 }
 212 
 213 gboolean
 214 unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 215 {
 216     const char *value = NULL;
 217     guint interval_ms = 0U;
 218     GHashTable *config_hash = pcmk__strkey_table(free, free);
 219 
 220     pe_rule_eval_data_t rule_data = {
 221         .node_hash = NULL,
 222         .now = scheduler->now,
 223         .match_data = NULL,
 224         .rsc_data = NULL,
 225         .op_data = NULL
 226     };
 227 
 228     scheduler->config_hash = config_hash;
 229 
 230     pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET, &rule_data,
 231                                config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS,
 232                                FALSE, scheduler);
 233 
 234     pcmk__validate_cluster_options(config_hash);
 235 
 236     set_config_flag(scheduler, PCMK_OPT_ENABLE_STARTUP_PROBES,
 237                     pcmk_sched_probe_resources);
 238     if (!pcmk_is_set(scheduler->flags, pcmk_sched_probe_resources)) {
 239         crm_info("Startup probes: disabled (dangerous)");
 240     }
 241 
 242     value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
 243     if (value && crm_is_true(value)) {
 244         crm_info("Watchdog-based self-fencing will be performed via SBD if "
 245                  "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
 246                  " is nonzero");
 247         pcmk__set_scheduler_flags(scheduler, pcmk_sched_have_fencing);
 248     }
 249 
 250     /* Set certain flags via xpath here, so they can be used before the relevant
 251      * configuration sections are unpacked.
 252      */
 253     set_if_xpath(pcmk_sched_enable_unfencing, XPATH_ENABLE_UNFENCING,
 254                  scheduler);
 255 
 256     value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT);
 257     pcmk_parse_interval_spec(value, &interval_ms);
 258 
 259     if (interval_ms >= INT_MAX) {
 260         scheduler->stonith_timeout = INT_MAX;
 261     } else {
 262         scheduler->stonith_timeout = (int) interval_ms;
 263     }
 264     crm_debug("STONITH timeout: %d", scheduler->stonith_timeout);
 265 
 266     set_config_flag(scheduler, PCMK_OPT_STONITH_ENABLED,
 267                     pcmk_sched_fencing_enabled);
 268     if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
 269         crm_debug("STONITH of failed nodes is enabled");
 270     } else {
 271         crm_debug("STONITH of failed nodes is disabled");
 272     }
 273 
 274     scheduler->stonith_action = pcmk__cluster_option(config_hash,
 275                                                      PCMK_OPT_STONITH_ACTION);
 276     if (!strcmp(scheduler->stonith_action, PCMK__ACTION_POWEROFF)) {
 277         pcmk__warn_once(pcmk__wo_poweroff,
 278                         "Support for " PCMK_OPT_STONITH_ACTION " of "
 279                         "'" PCMK__ACTION_POWEROFF "' is deprecated and will be "
 280                         "removed in a future release "
 281                         "(use '" PCMK_ACTION_OFF "' instead)");
 282         scheduler->stonith_action = PCMK_ACTION_OFF;
 283     }
 284     crm_trace("STONITH will %s nodes", scheduler->stonith_action);
 285 
 286     set_config_flag(scheduler, PCMK_OPT_CONCURRENT_FENCING,
 287                     pcmk_sched_concurrent_fencing);
 288     if (pcmk_is_set(scheduler->flags, pcmk_sched_concurrent_fencing)) {
 289         crm_debug("Concurrent fencing is enabled");
 290     } else {
 291         crm_debug("Concurrent fencing is disabled");
 292     }
 293 
 294     value = pcmk__cluster_option(config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY);
 295     if (value) {
 296         pcmk_parse_interval_spec(value, &interval_ms);
 297         scheduler->priority_fencing_delay = (int) (interval_ms / 1000);
 298         crm_trace("Priority fencing delay is %ds",
 299                   scheduler->priority_fencing_delay);
 300     }
 301 
 302     set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES,
 303                     pcmk_sched_stop_all);
 304     crm_debug("Stop all active resources: %s",
 305               pcmk__flag_text(scheduler->flags, pcmk_sched_stop_all));
 306 
 307     set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER,
 308                     pcmk_sched_symmetric_cluster);
 309     if (pcmk_is_set(scheduler->flags, pcmk_sched_symmetric_cluster)) {
 310         crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
 311     }
 312 
 313     value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
 314 
 315     if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
 316         scheduler->no_quorum_policy = pcmk_no_quorum_ignore;
 317 
 318     } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
 319         scheduler->no_quorum_policy = pcmk_no_quorum_freeze;
 320 
 321     } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
 322         scheduler->no_quorum_policy = pcmk_no_quorum_demote;
 323 
 324     } else if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei)) {
 325         if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
 326             int do_panic = 0;
 327 
 328             crm_element_value_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC,
 329                                   &do_panic);
 330             if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
 331                 scheduler->no_quorum_policy = pcmk_no_quorum_fence;
 332             } else {
 333                 crm_notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY
 334                            " to 'stop': cluster has never had quorum");
 335                 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
 336             }
 337         } else {
 338             pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY
 339                              " to 'stop' because fencing is disabled");
 340             scheduler->no_quorum_policy = pcmk_no_quorum_stop;
 341         }
 342 
 343     } else {
 344         scheduler->no_quorum_policy = pcmk_no_quorum_stop;
 345     }
 346 
 347     switch (scheduler->no_quorum_policy) {
 348         case pcmk_no_quorum_freeze:
 349             crm_debug("On loss of quorum: Freeze resources");
 350             break;
 351         case pcmk_no_quorum_stop:
 352             crm_debug("On loss of quorum: Stop ALL resources");
 353             break;
 354         case pcmk_no_quorum_demote:
 355             crm_debug("On loss of quorum: "
 356                       "Demote promotable resources and stop other resources");
 357             break;
 358         case pcmk_no_quorum_fence:
 359             crm_notice("On loss of quorum: Fence all remaining nodes");
 360             break;
 361         case pcmk_no_quorum_ignore:
 362             crm_notice("On loss of quorum: Ignore");
 363             break;
 364     }
 365 
 366     set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_RESOURCES,
 367                     pcmk_sched_stop_removed_resources);
 368     if (pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) {
 369         crm_trace("Orphan resources are stopped");
 370     } else {
 371         crm_trace("Orphan resources are ignored");
 372     }
 373 
 374     set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_ACTIONS,
 375                     pcmk_sched_cancel_removed_actions);
 376     if (pcmk_is_set(scheduler->flags, pcmk_sched_cancel_removed_actions)) {
 377         crm_trace("Orphan resource actions are stopped");
 378     } else {
 379         crm_trace("Orphan resource actions are ignored");
 380     }
 381 
 382     value = pcmk__cluster_option(config_hash, PCMK__OPT_REMOVE_AFTER_STOP);
 383     if (value != NULL) {
 384         if (crm_is_true(value)) {
 385             pcmk__set_scheduler_flags(scheduler, pcmk_sched_remove_after_stop);
 386             pcmk__warn_once(pcmk__wo_remove_after,
 387                             "Support for the " PCMK__OPT_REMOVE_AFTER_STOP
 388                             " cluster property is deprecated and will be "
 389                             "removed in a future release");
 390         } else {
 391             pcmk__clear_scheduler_flags(scheduler,
 392                                         pcmk_sched_remove_after_stop);
 393         }
 394     }
 395 
 396     set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE,
 397                     pcmk_sched_in_maintenance);
 398     crm_trace("Maintenance mode: %s",
 399               pcmk__flag_text(scheduler->flags, pcmk_sched_in_maintenance));
 400 
 401     set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL,
 402                     pcmk_sched_start_failure_fatal);
 403     if (pcmk_is_set(scheduler->flags, pcmk_sched_start_failure_fatal)) {
 404         crm_trace("Start failures are always fatal");
 405     } else {
 406         crm_trace("Start failures are handled by failcount");
 407     }
 408 
 409     if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
 410         set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING,
 411                         pcmk_sched_startup_fencing);
 412     }
 413     if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) {
 414         crm_trace("Unseen nodes will be fenced");
 415     } else {
 416         pcmk__warn_once(pcmk__wo_blind,
 417                         "Blind faith: not fencing unseen nodes");
 418     }
 419 
 420     pe__unpack_node_health_scores(scheduler);
 421 
 422     scheduler->placement_strategy =
 423         pcmk__cluster_option(config_hash, PCMK_OPT_PLACEMENT_STRATEGY);
 424     crm_trace("Placement strategy: %s", scheduler->placement_strategy);
 425 
 426     set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK,
 427                     pcmk_sched_shutdown_lock);
 428     if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
 429         value = pcmk__cluster_option(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
 430         pcmk_parse_interval_spec(value, &(scheduler->shutdown_lock));
 431         scheduler->shutdown_lock /= 1000;
 432         crm_trace("Resources will be locked to nodes that were cleanly "
 433                   "shut down (locks expire after %s)",
 434                   pcmk__readable_interval(scheduler->shutdown_lock));
 435     } else {
 436         crm_trace("Resources will not be locked to nodes that were cleanly "
 437                   "shut down");
 438     }
 439 
 440     value = pcmk__cluster_option(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
 441     pcmk_parse_interval_spec(value, &(scheduler->node_pending_timeout));
 442     scheduler->node_pending_timeout /= 1000;
 443     if (scheduler->node_pending_timeout == 0) {
 444         crm_trace("Do not fence pending nodes");
 445     } else {
 446         crm_trace("Fence pending nodes after %s",
 447                   pcmk__readable_interval(scheduler->node_pending_timeout
 448                                           * 1000));
 449     }
 450 
 451     return TRUE;
 452 }
 453 
 454 pcmk_node_t *
 455 pe_create_node(const char *id, const char *uname, const char *type,
     /* [previous][next][first][last][top][bottom][index][help] */
 456                const char *score, pcmk_scheduler_t *scheduler)
 457 {
 458     pcmk_node_t *new_node = NULL;
 459 
 460     if (pcmk_find_node(scheduler, uname) != NULL) {
 461         pcmk__config_warn("More than one node entry has name '%s'", uname);
 462     }
 463 
 464     new_node = calloc(1, sizeof(pcmk_node_t));
 465     if (new_node == NULL) {
 466         pcmk__sched_err("Could not allocate memory for node %s", uname);
 467         return NULL;
 468     }
 469 
 470     new_node->weight = char2score(score);
 471     new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
 472 
 473     if (new_node->details == NULL) {
 474         free(new_node);
 475         pcmk__sched_err("Could not allocate memory for node %s", uname);
 476         return NULL;
 477     }
 478 
 479     crm_trace("Creating node for entry %s/%s", uname, id);
 480     new_node->details->id = id;
 481     new_node->details->uname = uname;
 482     new_node->details->online = FALSE;
 483     new_node->details->shutdown = FALSE;
 484     new_node->details->rsc_discovery_enabled = TRUE;
 485     new_node->details->running_rsc = NULL;
 486     new_node->details->data_set = scheduler;
 487 
 488     if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
 489                      pcmk__str_null_matches|pcmk__str_casei)) {
 490         new_node->details->type = pcmk_node_variant_cluster;
 491 
 492     } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
 493         new_node->details->type = pcmk_node_variant_remote;
 494         pcmk__set_scheduler_flags(scheduler, pcmk_sched_have_remote_nodes);
 495 
 496     } else {
 497         /* @COMPAT 'ping' is the default for backward compatibility, but it
 498          * should be changed to 'member' at a compatibility break
 499          */
 500         if (!pcmk__str_eq(type, PCMK__VALUE_PING, pcmk__str_casei)) {
 501             pcmk__config_warn("Node %s has unrecognized type '%s', "
 502                               "assuming '" PCMK__VALUE_PING "'",
 503                               pcmk__s(uname, "without name"), type);
 504         }
 505         pcmk__warn_once(pcmk__wo_ping_node,
 506                         "Support for nodes of type '" PCMK__VALUE_PING "' "
 507                         "(such as %s) is deprecated and will be removed in a "
 508                         "future release",
 509                         pcmk__s(uname, "unnamed node"));
 510         new_node->details->type = node_ping;
 511     }
 512 
 513     new_node->details->attrs = pcmk__strkey_table(free, free);
 514 
 515     if (pcmk__is_pacemaker_remote_node(new_node)) {
 516         pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "remote");
 517     } else {
 518         pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "cluster");
 519     }
 520 
 521     new_node->details->utilization = pcmk__strkey_table(free, free);
 522     new_node->details->digest_cache = pcmk__strkey_table(free,
 523                                                           pe__free_digests);
 524 
 525     scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
 526                                             pe__cmp_node_name);
 527     return new_node;
 528 }
 529 
 530 static const char *
 531 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
     /* [previous][next][first][last][top][bottom][index][help] */
 532 {
 533     xmlNode *attr_set = NULL;
 534     xmlNode *attr = NULL;
 535 
 536     const char *container_id = pcmk__xe_id(xml_obj);
 537     const char *remote_name = NULL;
 538     const char *remote_server = NULL;
 539     const char *remote_port = NULL;
 540     const char *connect_timeout = "60s";
 541     const char *remote_allow_migrate=NULL;
 542     const char *is_managed = NULL;
 543 
 544     for (attr_set = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
 545          attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) {
 546 
 547         if (!pcmk__xe_is(attr_set, PCMK_XE_META_ATTRIBUTES)) {
 548             continue;
 549         }
 550 
 551         for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
 552              attr != NULL; attr = pcmk__xe_next(attr)) {
 553 
 554             const char *value = crm_element_value(attr, PCMK_XA_VALUE);
 555             const char *name = crm_element_value(attr, PCMK_XA_NAME);
 556 
 557             if (name == NULL) { // Sanity
 558                 continue;
 559             }
 560 
 561             if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
 562                 remote_name = value;
 563 
 564             } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
 565                 remote_server = value;
 566 
 567             } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
 568                 remote_port = value;
 569 
 570             } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
 571                 connect_timeout = value;
 572 
 573             } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
 574                 remote_allow_migrate = value;
 575 
 576             } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
 577                 is_managed = value;
 578             }
 579         }
 580     }
 581 
 582     if (remote_name == NULL) {
 583         return NULL;
 584     }
 585 
 586     if (pe_find_resource(data->resources, remote_name) != NULL) {
 587         return NULL;
 588     }
 589 
 590     pe_create_remote_xml(parent, remote_name, container_id,
 591                          remote_allow_migrate, is_managed,
 592                          connect_timeout, remote_server, remote_port);
 593     return remote_name;
 594 }
 595 
 596 static void
 597 handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
     /* [previous][next][first][last][top][bottom][index][help] */
 598 {
 599     if ((new_node->details->type == pcmk_node_variant_remote)
 600         && (new_node->details->remote_rsc == NULL)) {
 601         /* Ignore fencing for remote nodes that don't have a connection resource
 602          * associated with them. This happens when remote node entries get left
 603          * in the nodes section after the connection resource is removed.
 604          */
 605         return;
 606     }
 607 
 608     if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) {
 609         // All nodes are unclean until we've seen their status entry
 610         new_node->details->unclean = TRUE;
 611 
 612     } else {
 613         // Blind faith ...
 614         new_node->details->unclean = FALSE;
 615     }
 616 
 617     /* We need to be able to determine if a node's status section
 618      * exists or not separate from whether the node is unclean. */
 619     new_node->details->unseen = TRUE;
 620 }
 621 
 622 gboolean
 623 unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 624 {
 625     xmlNode *xml_obj = NULL;
 626     pcmk_node_t *new_node = NULL;
 627     const char *id = NULL;
 628     const char *uname = NULL;
 629     const char *type = NULL;
 630     const char *score = NULL;
 631 
 632     for (xml_obj = pcmk__xe_first_child(xml_nodes, NULL, NULL, NULL);
 633          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
 634 
 635         if (pcmk__xe_is(xml_obj, PCMK_XE_NODE)) {
 636             new_node = NULL;
 637 
 638             id = crm_element_value(xml_obj, PCMK_XA_ID);
 639             uname = crm_element_value(xml_obj, PCMK_XA_UNAME);
 640             type = crm_element_value(xml_obj, PCMK_XA_TYPE);
 641             score = crm_element_value(xml_obj, PCMK_XA_SCORE);
 642             crm_trace("Processing node %s/%s", uname, id);
 643 
 644             if (id == NULL) {
 645                 pcmk__config_err("Ignoring <" PCMK_XE_NODE
 646                                  "> entry in configuration without id");
 647                 continue;
 648             }
 649             new_node = pe_create_node(id, uname, type, score, scheduler);
 650 
 651             if (new_node == NULL) {
 652                 return FALSE;
 653             }
 654 
 655             handle_startup_fencing(scheduler, new_node);
 656 
 657             add_node_attrs(xml_obj, new_node, FALSE, scheduler);
 658 
 659             crm_trace("Done with node %s",
 660                       crm_element_value(xml_obj, PCMK_XA_UNAME));
 661         }
 662     }
 663 
 664     if (scheduler->localhost
 665         && (pcmk_find_node(scheduler, scheduler->localhost) == NULL)) {
 666         crm_info("Creating a fake local node");
 667         pe_create_node(scheduler->localhost, scheduler->localhost, NULL, 0,
 668                        scheduler);
 669     }
 670 
 671     return TRUE;
 672 }
 673 
 674 static void
 675 setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 676 {
 677     const char *container_id = NULL;
 678 
 679     if (rsc->children) {
 680         g_list_foreach(rsc->children, (GFunc) setup_container, scheduler);
 681         return;
 682     }
 683 
 684     container_id = g_hash_table_lookup(rsc->meta, PCMK__META_CONTAINER);
 685     if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) {
 686         pcmk_resource_t *container = pe_find_resource(scheduler->resources,
 687                                                       container_id);
 688 
 689         if (container) {
 690             rsc->container = container;
 691             pcmk__set_rsc_flags(container, pcmk_rsc_has_filler);
 692             container->fillers = g_list_append(container->fillers, rsc);
 693             pcmk__rsc_trace(rsc, "Resource %s's container is %s",
 694                             rsc->id, container_id);
 695         } else {
 696             pcmk__config_err("Resource %s: Unknown resource container (%s)",
 697                              rsc->id, container_id);
 698         }
 699     }
 700 }
 701 
 702 gboolean
 703 unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 704 {
 705     xmlNode *xml_obj = NULL;
 706 
 707     /* Create remote nodes and guest nodes from the resource configuration
 708      * before unpacking resources.
 709      */
 710     for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
 711          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
 712 
 713         const char *new_node_id = NULL;
 714 
 715         /* Check for remote nodes, which are defined by ocf:pacemaker:remote
 716          * primitives.
 717          */
 718         if (xml_contains_remote_node(xml_obj)) {
 719             new_node_id = pcmk__xe_id(xml_obj);
 720             /* The pcmk_find_node() check ensures we don't iterate over an
 721              * expanded node that has already been added to the node list
 722              */
 723             if (new_node_id
 724                 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
 725                 crm_trace("Found remote node %s defined by resource %s",
 726                           new_node_id, pcmk__xe_id(xml_obj));
 727                 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
 728                                NULL, scheduler);
 729             }
 730             continue;
 731         }
 732 
 733         /* Check for guest nodes, which are defined by special meta-attributes
 734          * of a primitive of any type (for example, VirtualDomain or Xen).
 735          */
 736         if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
 737             /* This will add an ocf:pacemaker:remote primitive to the
 738              * configuration for the guest node's connection, to be unpacked
 739              * later.
 740              */
 741             new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
 742                                                  scheduler);
 743             if (new_node_id
 744                 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
 745                 crm_trace("Found guest node %s in resource %s",
 746                           new_node_id, pcmk__xe_id(xml_obj));
 747                 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
 748                                NULL, scheduler);
 749             }
 750             continue;
 751         }
 752 
 753         /* Check for guest nodes inside a group. Clones are currently not
 754          * supported as guest nodes.
 755          */
 756         if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
 757             xmlNode *xml_obj2 = NULL;
 758             for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
 759                  xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) {
 760 
 761                 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
 762                                                      scheduler);
 763 
 764                 if (new_node_id
 765                     && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
 766                     crm_trace("Found guest node %s in resource %s inside group %s",
 767                               new_node_id, pcmk__xe_id(xml_obj2),
 768                               pcmk__xe_id(xml_obj));
 769                     pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
 770                                    NULL, scheduler);
 771                 }
 772             }
 773         }
 774     }
 775     return TRUE;
 776 }
 777 
 778 /* Call this after all the nodes and resources have been
 779  * unpacked, but before the status section is read.
 780  *
 781  * A remote node's online status is reflected by the state
 782  * of the remote node's connection resource. We need to link
 783  * the remote node to this connection resource so we can have
 784  * easy access to the connection resource during the scheduler calculations.
 785  */
 786 static void
 787 link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 788 {
 789     pcmk_node_t *remote_node = NULL;
 790 
 791     if (new_rsc->is_remote_node == FALSE) {
 792         return;
 793     }
 794 
 795     if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) {
 796         /* remote_nodes and remote_resources are not linked in quick location calculations */
 797         return;
 798     }
 799 
 800     remote_node = pcmk_find_node(scheduler, new_rsc->id);
 801     CRM_CHECK(remote_node != NULL, return);
 802 
 803     pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
 804                     new_rsc->id, pcmk__node_name(remote_node));
 805     remote_node->details->remote_rsc = new_rsc;
 806 
 807     if (new_rsc->container == NULL) {
 808         /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
 809          * the same as is done for cluster nodes.
 810          */
 811         handle_startup_fencing(scheduler, remote_node);
 812 
 813     } else {
 814         /* pe_create_node() marks the new node as "remote" or "cluster"; now
 815          * that we know the node is a guest node, update it correctly.
 816          */
 817         pcmk__insert_dup(remote_node->details->attrs,
 818                          CRM_ATTR_KIND, "container");
 819     }
 820 }
 821 
 822 static void
 823 destroy_tag(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 824 {
 825     pcmk_tag_t *tag = data;
 826 
 827     if (tag) {
 828         free(tag->id);
 829         g_list_free_full(tag->refs, free);
 830         free(tag);
 831     }
 832 }
 833 
 834 /*!
 835  * \internal
 836  * \brief Parse configuration XML for resource information
 837  *
 838  * \param[in]     xml_resources  Top of resource configuration XML
 839  * \param[in,out] scheduler      Scheduler data
 840  *
 841  * \return TRUE
 842  *
 843  * \note unpack_remote_nodes() MUST be called before this, so that the nodes can
 844  *       be used when pe__unpack_resource() calls resource_location()
 845  */
 846 gboolean
 847 unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 848 {
 849     xmlNode *xml_obj = NULL;
 850     GList *gIter = NULL;
 851 
 852     scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag);
 853 
 854     for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
 855          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
 856 
 857         pcmk_resource_t *new_rsc = NULL;
 858         const char *id = pcmk__xe_id(xml_obj);
 859 
 860         if (pcmk__str_empty(id)) {
 861             pcmk__config_err("Ignoring <%s> resource without ID",
 862                              xml_obj->name);
 863             continue;
 864         }
 865 
 866         if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
 867             if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id,
 868                                              NULL, NULL) == FALSE) {
 869                 /* Record the template's ID for the knowledge of its existence anyway. */
 870                 pcmk__insert_dup(scheduler->template_rsc_sets, id, NULL);
 871             }
 872             continue;
 873         }
 874 
 875         crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
 876         if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
 877                                 scheduler) == pcmk_rc_ok) {
 878             scheduler->resources = g_list_append(scheduler->resources, new_rsc);
 879             pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
 880 
 881         } else {
 882             pcmk__config_err("Ignoring <%s> resource '%s' "
 883                              "because configuration is invalid",
 884                              xml_obj->name, id);
 885         }
 886     }
 887 
 888     for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) {
 889         pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
 890 
 891         setup_container(rsc, scheduler);
 892         link_rsc2remotenode(scheduler, rsc);
 893     }
 894 
 895     scheduler->resources = g_list_sort(scheduler->resources,
 896                                       pe__cmp_rsc_priority);
 897     if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) {
 898         /* Ignore */
 899 
 900     } else if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)
 901                && !pcmk_is_set(scheduler->flags, pcmk_sched_have_fencing)) {
 902 
 903         pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
 904         pcmk__config_err("Either configure some or disable STONITH with the "
 905                          PCMK_OPT_STONITH_ENABLED " option");
 906         pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
 907     }
 908 
 909     return TRUE;
 910 }
 911 
 912 /*!
 913  * \internal
 914  * \brief Parse configuration XML for fencing topology information
 915  *
 916  * \param[in]     xml_fencing_topology  Top of fencing topology configuration XML
 917  * \param[in,out] scheduler             Scheduler data
 918  *
 919  * \return void
 920  */
 921 void
 922 pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 923 {
 924     xmlNode *xml_obj = NULL;
 925     int id = 0;
 926 
 927     for (xml_obj = pcmk__xe_first_child(xml_fencing_topology, PCMK_XE_FENCING_LEVEL, NULL, NULL);
 928          xml_obj != NULL; xml_obj = pcmk__xe_next_same(xml_obj)) {
 929 
 930         crm_element_value_int(xml_obj, PCMK_XA_INDEX, &id);
 931 
 932         // Ensure an ID was given
 933         if (pcmk__str_empty(pcmk__xe_id(xml_obj))) {
 934             pcmk__config_warn("Ignoring registration for topology level without ID");
 935             continue;
 936         }
 937 
 938         // Ensure level ID is in allowed range
 939         if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) {
 940             pcmk__config_warn("Ignoring topology registration with invalid level %d",
 941                                id);
 942             continue;
 943         }
 944 
 945     }
 946 }
 947 
 948 gboolean
 949 unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 950 {
 951     xmlNode *xml_tag = NULL;
 952 
 953     scheduler->tags = pcmk__strkey_table(free, destroy_tag);
 954 
 955     for (xml_tag = pcmk__xe_first_child(xml_tags, NULL, NULL, NULL);
 956          xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) {
 957 
 958         xmlNode *xml_obj_ref = NULL;
 959         const char *tag_id = pcmk__xe_id(xml_tag);
 960 
 961         if (!pcmk__xe_is(xml_tag, PCMK_XE_TAG)) {
 962             continue;
 963         }
 964 
 965         if (tag_id == NULL) {
 966             pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
 967                              (const char *) xml_tag->name);
 968             continue;
 969         }
 970 
 971         for (xml_obj_ref = pcmk__xe_first_child(xml_tag, NULL, NULL, NULL);
 972              xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
 973 
 974             const char *obj_ref = pcmk__xe_id(xml_obj_ref);
 975 
 976             if (!pcmk__xe_is(xml_obj_ref, PCMK_XE_OBJ_REF)) {
 977                 continue;
 978             }
 979 
 980             if (obj_ref == NULL) {
 981                 pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
 982                                  xml_obj_ref->name, tag_id);
 983                 continue;
 984             }
 985 
 986             if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) {
 987                 return FALSE;
 988             }
 989         }
 990     }
 991 
 992     return TRUE;
 993 }
 994 
 995 /* The ticket state section:
 996  * "/cib/status/tickets/ticket_state" */
 997 static gboolean
 998 unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 999 {
1000     const char *ticket_id = NULL;
1001     const char *granted = NULL;
1002     const char *last_granted = NULL;
1003     const char *standby = NULL;
1004     xmlAttrPtr xIter = NULL;
1005 
1006     pcmk_ticket_t *ticket = NULL;
1007 
1008     ticket_id = pcmk__xe_id(xml_ticket);
1009     if (pcmk__str_empty(ticket_id)) {
1010         return FALSE;
1011     }
1012 
1013     crm_trace("Processing ticket state for %s", ticket_id);
1014 
1015     ticket = g_hash_table_lookup(scheduler->tickets, ticket_id);
1016     if (ticket == NULL) {
1017         ticket = ticket_new(ticket_id, scheduler);
1018         if (ticket == NULL) {
1019             return FALSE;
1020         }
1021     }
1022 
1023     for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1024         const char *prop_name = (const char *)xIter->name;
1025         const char *prop_value = pcmk__xml_attr_value(xIter);
1026 
1027         if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
1028             continue;
1029         }
1030         pcmk__insert_dup(ticket->state, prop_name, prop_value);
1031     }
1032 
1033     granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
1034     if (granted && crm_is_true(granted)) {
1035         ticket->granted = TRUE;
1036         crm_info("We have ticket '%s'", ticket->id);
1037     } else {
1038         ticket->granted = FALSE;
1039         crm_info("We do not have ticket '%s'", ticket->id);
1040     }
1041 
1042     last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
1043     if (last_granted) {
1044         long long last_granted_ll;
1045 
1046         pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1047         ticket->last_granted = (time_t) last_granted_ll;
1048     }
1049 
1050     standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
1051     if (standby && crm_is_true(standby)) {
1052         ticket->standby = TRUE;
1053         if (ticket->granted) {
1054             crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
1055         }
1056     } else {
1057         ticket->standby = FALSE;
1058     }
1059 
1060     crm_trace("Done with ticket state for %s", ticket_id);
1061 
1062     return TRUE;
1063 }
1064 
1065 static gboolean
1066 unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1067 {
1068     xmlNode *xml_obj = NULL;
1069 
1070     for (xml_obj = pcmk__xe_first_child(xml_tickets, NULL, NULL, NULL);
1071          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
1072 
1073         if (!pcmk__xe_is(xml_obj, PCMK__XE_TICKET_STATE)) {
1074             continue;
1075         }
1076         unpack_ticket_state(xml_obj, scheduler);
1077     }
1078 
1079     return TRUE;
1080 }
1081 
1082 static void
1083 unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
     /* [previous][next][first][last][top][bottom][index][help] */
1084                            pcmk_scheduler_t *scheduler)
1085 {
1086     const char *discovery = NULL;
1087     const xmlNode *attrs = NULL;
1088     pcmk_resource_t *rsc = NULL;
1089 
1090     if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1091         return;
1092     }
1093 
1094     if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1095         return;
1096     }
1097     crm_trace("Processing Pacemaker Remote node %s",
1098               pcmk__node_name(this_node));
1099 
1100     pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_IN_MAINTENANCE),
1101                        &(this_node->details->remote_maintenance), 0);
1102 
1103     rsc = this_node->details->remote_rsc;
1104     if (this_node->details->remote_requires_reset == FALSE) {
1105         this_node->details->unclean = FALSE;
1106         this_node->details->unseen = FALSE;
1107     }
1108     attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL,
1109                                  NULL);
1110     add_node_attrs(attrs, this_node, TRUE, scheduler);
1111 
1112     if (pe__shutdown_requested(this_node)) {
1113         crm_info("%s is shutting down", pcmk__node_name(this_node));
1114         this_node->details->shutdown = TRUE;
1115     }
1116 
1117     if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL,
1118                                     pcmk__rsc_node_current))) {
1119         crm_info("%s is in standby mode", pcmk__node_name(this_node));
1120         this_node->details->standby = TRUE;
1121     }
1122 
1123     if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
1124                                     pcmk__rsc_node_current))
1125         || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) {
1126         crm_info("%s is in maintenance mode", pcmk__node_name(this_node));
1127         this_node->details->maintenance = TRUE;
1128     }
1129 
1130     discovery = pcmk__node_attr(this_node,
1131                                 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1132                                 NULL, pcmk__rsc_node_current);
1133     if ((discovery != NULL) && !crm_is_true(discovery)) {
1134         pcmk__warn_once(pcmk__wo_rdisc_enabled,
1135                         "Support for the "
1136                         PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1137                         " node attribute is deprecated and will be removed"
1138                         " (and behave as 'true') in a future release.");
1139 
1140         if (pcmk__is_remote_node(this_node)
1141             && !pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
1142             pcmk__config_warn("Ignoring "
1143                               PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1144                               " attribute on Pacemaker Remote node %s"
1145                               " because fencing is disabled",
1146                               pcmk__node_name(this_node));
1147         } else {
1148             /* This is either a remote node with fencing enabled, or a guest
1149              * node. We don't care whether fencing is enabled when fencing guest
1150              * nodes, because they are "fenced" by recovering their containing
1151              * resource.
1152              */
1153             crm_info("%s has resource discovery disabled",
1154                      pcmk__node_name(this_node));
1155             this_node->details->rsc_discovery_enabled = FALSE;
1156         }
1157     }
1158 }
1159 
1160 /*!
1161  * \internal
1162  * \brief Unpack a cluster node's transient attributes
1163  *
1164  * \param[in]     state      CIB node state XML
1165  * \param[in,out] node       Cluster node whose attributes are being unpacked
1166  * \param[in,out] scheduler  Scheduler data
1167  */
1168 static void
1169 unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1170                             pcmk_scheduler_t *scheduler)
1171 {
1172     const char *discovery = NULL;
1173     const xmlNode *attrs = pcmk__xe_first_child(state,
1174                                                 PCMK__XE_TRANSIENT_ATTRIBUTES,
1175                                                 NULL, NULL);
1176 
1177     add_node_attrs(attrs, node, TRUE, scheduler);
1178 
1179     if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_STANDBY, NULL,
1180                                     pcmk__rsc_node_current))) {
1181         crm_info("%s is in standby mode", pcmk__node_name(node));
1182         node->details->standby = TRUE;
1183     }
1184 
1185     if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
1186                                     pcmk__rsc_node_current))) {
1187         crm_info("%s is in maintenance mode", pcmk__node_name(node));
1188         node->details->maintenance = TRUE;
1189     }
1190 
1191     discovery = pcmk__node_attr(node,
1192                                 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1193                                 NULL, pcmk__rsc_node_current);
1194     if ((discovery != NULL) && !crm_is_true(discovery)) {
1195         pcmk__config_warn("Ignoring "
1196                           PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1197                           " attribute for %s because disabling resource"
1198                           " discovery is not allowed for cluster nodes",
1199                           pcmk__node_name(node));
1200     }
1201 }
1202 
1203 /*!
1204  * \internal
1205  * \brief Unpack a node state entry (first pass)
1206  *
1207  * Unpack one node state entry from status. This unpacks information from the
1208  * \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not
1209  * the resource history inside it. Multiple passes through the status are needed
1210  * to fully unpack everything.
1211  *
1212  * \param[in]     state      CIB node state XML
1213  * \param[in,out] scheduler  Scheduler data
1214  */
1215 static void
1216 unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1217 {
1218     const char *id = NULL;
1219     const char *uname = NULL;
1220     pcmk_node_t *this_node = NULL;
1221 
1222     id = crm_element_value(state, PCMK_XA_ID);
1223     if (id == NULL) {
1224         pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
1225                          PCMK_XA_ID);
1226         crm_log_xml_info(state, "missing-id");
1227         return;
1228     }
1229 
1230     uname = crm_element_value(state, PCMK_XA_UNAME);
1231     if (uname == NULL) {
1232         /* If a joining peer makes the cluster acquire the quorum from corosync
1233          * meanwhile it has not joined CPG membership of pacemaker-controld yet,
1234          * it's possible that the created PCMK__XE_NODE_STATE entry doesn't have
1235          * a PCMK_XA_UNAME yet. We should recognize the node as `pending` and
1236          * wait for it to join CPG.
1237          */
1238         crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
1239                   "without " PCMK_XA_UNAME,
1240                   id);
1241     }
1242 
1243     this_node = pe_find_node_any(scheduler->nodes, id, uname);
1244     if (this_node == NULL) {
1245         crm_notice("Ignoring recorded state for removed node with name %s and "
1246                    PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id);
1247         return;
1248     }
1249 
1250     if (pcmk__is_pacemaker_remote_node(this_node)) {
1251         /* We can't determine the online status of Pacemaker Remote nodes until
1252          * after all resource history has been unpacked. In this first pass, we
1253          * do need to mark whether the node has been fenced, as this plays a
1254          * role during unpacking cluster node resource state.
1255          */
1256         pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_FENCED),
1257                            &(this_node->details->remote_was_fenced), 0);
1258         return;
1259     }
1260 
1261     unpack_transient_attributes(state, this_node, scheduler);
1262 
1263     /* Provisionally mark this cluster node as clean. We have at least seen it
1264      * in the current cluster's lifetime.
1265      */
1266     this_node->details->unclean = FALSE;
1267     this_node->details->unseen = FALSE;
1268 
1269     crm_trace("Determining online status of cluster node %s (id %s)",
1270               pcmk__node_name(this_node), id);
1271     determine_online_status(state, this_node, scheduler);
1272 
1273     if (!pcmk_is_set(scheduler->flags, pcmk_sched_quorate)
1274         && this_node->details->online
1275         && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) {
1276         /* Everything else should flow from this automatically
1277          * (at least until the scheduler becomes able to migrate off
1278          * healthy resources)
1279          */
1280         pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1281                       FALSE);
1282     }
1283 }
1284 
1285 /*!
1286  * \internal
1287  * \brief Unpack nodes' resource history as much as possible
1288  *
1289  * Unpack as many nodes' resource history as possible in one pass through the
1290  * status. We need to process Pacemaker Remote nodes' connections/containers
1291  * before unpacking their history; the connection/container history will be
1292  * in another node's history, so it might take multiple passes to unpack
1293  * everything.
1294  *
1295  * \param[in]     status     CIB XML status section
1296  * \param[in]     fence      If true, treat any not-yet-unpacked nodes as unseen
1297  * \param[in,out] scheduler  Scheduler data
1298  *
1299  * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done,
1300  *         or EAGAIN if more unpacking remains to be done)
1301  */
1302 static int
1303 unpack_node_history(const xmlNode *status, bool fence,
     /* [previous][next][first][last][top][bottom][index][help] */
1304                     pcmk_scheduler_t *scheduler)
1305 {
1306     int rc = pcmk_rc_ok;
1307 
1308     // Loop through all PCMK__XE_NODE_STATE entries in CIB status
1309     for (const xmlNode *state = pcmk__xe_first_child(status,
1310                                                      PCMK__XE_NODE_STATE, NULL,
1311                                                      NULL);
1312          state != NULL; state = pcmk__xe_next_same(state)) {
1313 
1314         const char *id = pcmk__xe_id(state);
1315         const char *uname = crm_element_value(state, PCMK_XA_UNAME);
1316         pcmk_node_t *this_node = NULL;
1317 
1318         if ((id == NULL) || (uname == NULL)) {
1319             // Warning already logged in first pass through status section
1320             crm_trace("Not unpacking resource history from malformed "
1321                       PCMK__XE_NODE_STATE " without id and/or uname");
1322             continue;
1323         }
1324 
1325         this_node = pe_find_node_any(scheduler->nodes, id, uname);
1326         if (this_node == NULL) {
1327             // Warning already logged in first pass through status section
1328             crm_trace("Not unpacking resource history for node %s because "
1329                       "no longer in configuration", id);
1330             continue;
1331         }
1332 
1333         if (this_node->details->unpacked) {
1334             crm_trace("Not unpacking resource history for node %s because "
1335                       "already unpacked", id);
1336             continue;
1337         }
1338 
1339         if (fence) {
1340             // We're processing all remaining nodes
1341 
1342         } else if (pcmk__is_guest_or_bundle_node(this_node)) {
1343             /* We can unpack a guest node's history only after we've unpacked
1344              * other resource history to the point that we know that the node's
1345              * connection and containing resource are both up.
1346              */
1347             pcmk_resource_t *rsc = this_node->details->remote_rsc;
1348 
1349             if ((rsc == NULL) || (rsc->role != pcmk_role_started)
1350                 || (rsc->container->role != pcmk_role_started)) {
1351                 crm_trace("Not unpacking resource history for guest node %s "
1352                           "because container and connection are not known to "
1353                           "be up", id);
1354                 continue;
1355             }
1356 
1357         } else if (pcmk__is_remote_node(this_node)) {
1358             /* We can unpack a remote node's history only after we've unpacked
1359              * other resource history to the point that we know that the node's
1360              * connection is up, with the exception of when shutdown locks are
1361              * in use.
1362              */
1363             pcmk_resource_t *rsc = this_node->details->remote_rsc;
1364 
1365             if ((rsc == NULL)
1366                 || (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)
1367                     && (rsc->role != pcmk_role_started))) {
1368                 crm_trace("Not unpacking resource history for remote node %s "
1369                           "because connection is not known to be up", id);
1370                 continue;
1371             }
1372 
1373         /* If fencing and shutdown locks are disabled and we're not processing
1374          * unseen nodes, then we don't want to unpack offline nodes until online
1375          * nodes have been unpacked. This allows us to number active clone
1376          * instances first.
1377          */
1378         } else if (!pcmk_any_flags_set(scheduler->flags,
1379                                        pcmk_sched_fencing_enabled
1380                                        |pcmk_sched_shutdown_lock)
1381                    && !this_node->details->online) {
1382             crm_trace("Not unpacking resource history for offline "
1383                       "cluster node %s", id);
1384             continue;
1385         }
1386 
1387         if (pcmk__is_pacemaker_remote_node(this_node)) {
1388             determine_remote_online_status(scheduler, this_node);
1389             unpack_handle_remote_attrs(this_node, state, scheduler);
1390         }
1391 
1392         crm_trace("Unpacking resource history for %snode %s",
1393                   (fence? "unseen " : ""), id);
1394 
1395         this_node->details->unpacked = TRUE;
1396         unpack_node_lrm(this_node, state, scheduler);
1397 
1398         rc = EAGAIN; // Other node histories might depend on this one
1399     }
1400     return rc;
1401 }
1402 
1403 /* remove nodes that are down, stopping */
1404 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1405 /* anything else? */
1406 gboolean
1407 unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1408 {
1409     xmlNode *state = NULL;
1410 
1411     crm_trace("Beginning unpack");
1412 
1413     if (scheduler->tickets == NULL) {
1414         scheduler->tickets = pcmk__strkey_table(free, destroy_ticket);
1415     }
1416 
1417     for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
1418          state = pcmk__xe_next(state)) {
1419 
1420         if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
1421             unpack_tickets_state((xmlNode *) state, scheduler);
1422 
1423         } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1424             unpack_node_state(state, scheduler);
1425         }
1426     }
1427 
1428     while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1429         crm_trace("Another pass through node resource histories is needed");
1430     }
1431 
1432     // Now catch any nodes we didn't see
1433     unpack_node_history(status,
1434                         pcmk_is_set(scheduler->flags,
1435                                     pcmk_sched_fencing_enabled),
1436                         scheduler);
1437 
1438     /* Now that we know where resources are, we can schedule stops of containers
1439      * with failed bundle connections
1440      */
1441     if (scheduler->stop_needed != NULL) {
1442         for (GList *item = scheduler->stop_needed; item; item = item->next) {
1443             pcmk_resource_t *container = item->data;
1444             pcmk_node_t *node = pcmk__current_node(container);
1445 
1446             if (node) {
1447                 stop_action(container, node, FALSE);
1448             }
1449         }
1450         g_list_free(scheduler->stop_needed);
1451         scheduler->stop_needed = NULL;
1452     }
1453 
1454     /* Now that we know status of all Pacemaker Remote connections and nodes,
1455      * we can stop connections for node shutdowns, and check the online status
1456      * of remote/guest nodes that didn't have any node history to unpack.
1457      */
1458     for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1459         pcmk_node_t *this_node = gIter->data;
1460 
1461         if (!pcmk__is_pacemaker_remote_node(this_node)) {
1462             continue;
1463         }
1464         if (this_node->details->shutdown
1465             && (this_node->details->remote_rsc != NULL)) {
1466             pe__set_next_role(this_node->details->remote_rsc, pcmk_role_stopped,
1467                               "remote shutdown");
1468         }
1469         if (!this_node->details->unpacked) {
1470             determine_remote_online_status(scheduler, this_node);
1471         }
1472     }
1473 
1474     return TRUE;
1475 }
1476 
1477 /*!
1478  * \internal
1479  * \brief Unpack node's time when it became a member at the cluster layer
1480  *
1481  * \param[in]     node_state  Node's \c PCMK__XE_NODE_STATE entry
1482  * \param[in,out] scheduler   Scheduler data
1483  *
1484  * \return Epoch time when node became a cluster member
1485  *         (or scheduler effective time for legacy entries) if a member,
1486  *         0 if not a member, or -1 if no valid information available
1487  */
1488 static long long
1489 unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1490 {
1491     const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
1492     int member = 0;
1493 
1494     if (member_time == NULL) {
1495         return -1LL;
1496 
1497     } else if (crm_str_to_boolean(member_time, &member) == 1) {
1498         /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1499          * recorded as a boolean for a DC < 2.1.7, or the node is pending
1500          * shutdown and has left the CPG, in which case it was set to 1 to avoid
1501          * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT.
1502          *
1503          * We return the effective time for in_ccm=1 because what's important to
1504          * avoid fencing is that effective time minus this value is less than
1505          * the pending node timeout.
1506          */
1507         return member? (long long) get_effective_time(scheduler) : 0LL;
1508 
1509     } else {
1510         long long when_member = 0LL;
1511 
1512         if ((pcmk__scan_ll(member_time, &when_member,
1513                            0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1514             crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
1515                      " in " PCMK__XE_NODE_STATE " entry", member_time);
1516             return -1LL;
1517         }
1518         return when_member;
1519     }
1520 }
1521 
1522 /*!
1523  * \internal
1524  * \brief Unpack node's time when it became online in process group
1525  *
1526  * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry
1527  *
1528  * \return Epoch time when node became online in process group (or 0 if not
1529  *         online, or 1 for legacy online entries)
1530  */
1531 static long long
1532 unpack_node_online(const xmlNode *node_state)
     /* [previous][next][first][last][top][bottom][index][help] */
1533 {
1534     const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD);
1535 
1536     // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1537     if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
1538                      pcmk__str_casei|pcmk__str_null_matches)) {
1539         return 0LL;
1540 
1541     } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1542         return 1LL;
1543 
1544     } else {
1545         long long when_online = 0LL;
1546 
1547         if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1548             || (when_online < 0)) {
1549             crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
1550                      PCMK__XE_NODE_STATE " entry, assuming offline", peer_time);
1551             return 0LL;
1552         }
1553         return when_online;
1554     }
1555 }
1556 
1557 /*!
1558  * \internal
1559  * \brief Unpack node attribute for user-requested fencing
1560  *
1561  * \param[in] node        Node to check
1562  * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry in CIB status
1563  *
1564  * \return \c true if fencing has been requested for \p node, otherwise \c false
1565  */
1566 static bool
1567 unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
     /* [previous][next][first][last][top][bottom][index][help] */
1568 {
1569     long long value = 0LL;
1570     int value_i = 0;
1571     const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
1572                                           NULL, pcmk__rsc_node_current);
1573 
1574     // Value may be boolean or an epoch time
1575     if (crm_str_to_boolean(value_s, &value_i) == 1) {
1576         return (value_i != 0);
1577     }
1578     if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) {
1579         return (value > 0);
1580     }
1581     crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1582              "node attribute for %s", value_s, pcmk__node_name(node));
1583     return false;
1584 }
1585 
1586 static gboolean
1587 determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
     /* [previous][next][first][last][top][bottom][index][help] */
1588                                    const xmlNode *node_state,
1589                                    pcmk_node_t *this_node)
1590 {
1591     gboolean online = FALSE;
1592     const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1593     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1594     long long when_member = unpack_node_member(node_state, scheduler);
1595     long long when_online = unpack_node_online(node_state);
1596 
1597     if (when_member <= 0) {
1598         crm_trace("Node %s is %sdown", pcmk__node_name(this_node),
1599                   ((when_member < 0)? "presumed " : ""));
1600 
1601     } else if (when_online > 0) {
1602         if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1603             online = TRUE;
1604         } else {
1605             crm_debug("Node %s is not ready to run resources: %s",
1606                       pcmk__node_name(this_node), join);
1607         }
1608 
1609     } else if (this_node->details->expected_up == FALSE) {
1610         crm_trace("Node %s controller is down: "
1611                   "member@%lld online@%lld join=%s expected=%s",
1612                   pcmk__node_name(this_node), when_member, when_online,
1613                   pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1614 
1615     } else {
1616         /* mark it unclean */
1617         pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1618         crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
1619                  pcmk__node_name(this_node), when_member, when_online,
1620                  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1621     }
1622     return online;
1623 }
1624 
1625 /*!
1626  * \internal
1627  * \brief Check whether a node has taken too long to join controller group
1628  *
1629  * \param[in,out] scheduler    Scheduler data
1630  * \param[in]     node         Node to check
1631  * \param[in]     when_member  Epoch time when node became a cluster member
1632  * \param[in]     when_online  Epoch time when node joined controller group
1633  *
1634  * \return true if node has been pending (on the way up) longer than
1635  *         \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false
1636  * \note This will also update the cluster's recheck time if appropriate.
1637  */
1638 static inline bool
1639 pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1640                  long long when_member, long long when_online)
1641 {
1642     if ((scheduler->node_pending_timeout > 0)
1643         && (when_member > 0) && (when_online <= 0)) {
1644         // There is a timeout on pending nodes, and node is pending
1645 
1646         time_t timeout = when_member + scheduler->node_pending_timeout;
1647 
1648         if (get_effective_time(node->details->data_set) >= timeout) {
1649             return true; // Node has timed out
1650         }
1651 
1652         // Node is pending, but still has time
1653         pe__update_recheck_time(timeout, scheduler, "pending node timeout");
1654     }
1655     return false;
1656 }
1657 
1658 static bool
1659 determine_online_status_fencing(pcmk_scheduler_t *scheduler,
     /* [previous][next][first][last][top][bottom][index][help] */
1660                                 const xmlNode *node_state,
1661                                 pcmk_node_t *this_node)
1662 {
1663     bool termination_requested = unpack_node_terminate(this_node, node_state);
1664     const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1665     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1666     long long when_member = unpack_node_member(node_state, scheduler);
1667     long long when_online = unpack_node_online(node_state);
1668 
1669 /*
1670   - PCMK__XA_JOIN          ::= member|down|pending|banned
1671   - PCMK_XA_EXPECTED       ::= member|down
1672 
1673   @COMPAT with entries recorded for DCs < 2.1.7
1674   - PCMK__XA_IN_CCM        ::= true|false
1675   - PCMK_XA_CRMD           ::= online|offline
1676 
1677   Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1678   - PCMK__XA_IN_CCM        ::= <timestamp>|0
1679   Since when node has been a cluster member. A value 0 of means the node is not
1680   a cluster member.
1681 
1682   - PCMK_XA_CRMD           ::= <timestamp>|0
1683   Since when peer has been online in CPG. A value 0 means the peer is offline
1684   in CPG.
1685 */
1686 
1687     crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1688               pcmk__node_name(this_node), when_member, when_online,
1689               pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1690               (termination_requested? " (termination requested)" : ""));
1691 
1692     if (this_node->details->shutdown) {
1693         crm_debug("%s is shutting down", pcmk__node_name(this_node));
1694 
1695         /* Slightly different criteria since we can't shut down a dead peer */
1696         return (when_online > 0);
1697     }
1698 
1699     if (when_member < 0) {
1700         pe_fence_node(scheduler, this_node,
1701                       "peer has not been seen by the cluster", FALSE);
1702         return false;
1703     }
1704 
1705     if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1706         pe_fence_node(scheduler, this_node,
1707                       "peer failed Pacemaker membership criteria", FALSE);
1708 
1709     } else if (termination_requested) {
1710         if ((when_member <= 0) && (when_online <= 0)
1711             && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1712             crm_info("%s was fenced as requested", pcmk__node_name(this_node));
1713             return false;
1714         }
1715         pe_fence_node(scheduler, this_node, "fencing was requested", false);
1716 
1717     } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1718                             pcmk__str_null_matches)) {
1719 
1720         if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1721             pe_fence_node(scheduler, this_node,
1722                           "peer pending timed out on joining the process group",
1723                           FALSE);
1724 
1725         } else if ((when_member > 0) || (when_online > 0)) {
1726             crm_info("- %s is not ready to run resources",
1727                      pcmk__node_name(this_node));
1728             this_node->details->standby = TRUE;
1729             this_node->details->pending = TRUE;
1730 
1731         } else {
1732             crm_trace("%s is down or still coming up",
1733                       pcmk__node_name(this_node));
1734         }
1735 
1736     } else if (when_member <= 0) {
1737         // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
1738         pe_fence_node(scheduler, this_node,
1739                       "peer is no longer part of the cluster", TRUE);
1740 
1741     } else if (when_online <= 0) {
1742         pe_fence_node(scheduler, this_node,
1743                       "peer process is no longer available", FALSE);
1744 
1745         /* Everything is running at this point, now check join state */
1746 
1747     } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1748         crm_info("%s is active", pcmk__node_name(this_node));
1749 
1750     } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
1751                                 CRMD_JOINSTATE_DOWN, NULL)) {
1752         crm_info("%s is not ready to run resources",
1753                  pcmk__node_name(this_node));
1754         this_node->details->standby = TRUE;
1755         this_node->details->pending = TRUE;
1756 
1757     } else {
1758         pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1759                       FALSE);
1760     }
1761 
1762     return (when_member > 0);
1763 }
1764 
1765 static void
1766 determine_remote_online_status(pcmk_scheduler_t *scheduler,
     /* [previous][next][first][last][top][bottom][index][help] */
1767                                pcmk_node_t *this_node)
1768 {
1769     pcmk_resource_t *rsc = this_node->details->remote_rsc;
1770     pcmk_resource_t *container = NULL;
1771     pcmk_node_t *host = NULL;
1772 
1773     /* If there is a node state entry for a (former) Pacemaker Remote node
1774      * but no resource creating that node, the node's connection resource will
1775      * be NULL. Consider it an offline remote node in that case.
1776      */
1777     if (rsc == NULL) {
1778         this_node->details->online = FALSE;
1779         goto remote_online_done;
1780     }
1781 
1782     container = rsc->container;
1783 
1784     if (container && pcmk__list_of_1(rsc->running_on)) {
1785         host = rsc->running_on->data;
1786     }
1787 
1788     /* If the resource is currently started, mark it online. */
1789     if (rsc->role == pcmk_role_started) {
1790         crm_trace("%s node %s presumed ONLINE because connection resource is started",
1791                   (container? "Guest" : "Remote"), this_node->details->id);
1792         this_node->details->online = TRUE;
1793     }
1794 
1795     /* consider this node shutting down if transitioning start->stop */
1796     if ((rsc->role == pcmk_role_started)
1797         && (rsc->next_role == pcmk_role_stopped)) {
1798 
1799         crm_trace("%s node %s shutting down because connection resource is stopping",
1800                   (container? "Guest" : "Remote"), this_node->details->id);
1801         this_node->details->shutdown = TRUE;
1802     }
1803 
1804     /* Now check all the failure conditions. */
1805     if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) {
1806         crm_trace("Guest node %s UNCLEAN because guest resource failed",
1807                   this_node->details->id);
1808         this_node->details->online = FALSE;
1809         this_node->details->remote_requires_reset = TRUE;
1810 
1811     } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
1812         crm_trace("%s node %s OFFLINE because connection resource failed",
1813                   (container? "Guest" : "Remote"), this_node->details->id);
1814         this_node->details->online = FALSE;
1815 
1816     } else if ((rsc->role == pcmk_role_stopped)
1817                || ((container != NULL)
1818                    && (container->role == pcmk_role_stopped))) {
1819 
1820         crm_trace("%s node %s OFFLINE because its resource is stopped",
1821                   (container? "Guest" : "Remote"), this_node->details->id);
1822         this_node->details->online = FALSE;
1823         this_node->details->remote_requires_reset = FALSE;
1824 
1825     } else if (host && (host->details->online == FALSE)
1826                && host->details->unclean) {
1827         crm_trace("Guest node %s UNCLEAN because host is unclean",
1828                   this_node->details->id);
1829         this_node->details->online = FALSE;
1830         this_node->details->remote_requires_reset = TRUE;
1831     }
1832 
1833 remote_online_done:
1834     crm_trace("Remote node %s online=%s",
1835         this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1836 }
1837 
1838 static void
1839 determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
     /* [previous][next][first][last][top][bottom][index][help] */
1840                         pcmk_scheduler_t *scheduler)
1841 {
1842     gboolean online = FALSE;
1843     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1844 
1845     CRM_CHECK(this_node != NULL, return);
1846 
1847     this_node->details->shutdown = FALSE;
1848     this_node->details->expected_up = FALSE;
1849 
1850     if (pe__shutdown_requested(this_node)) {
1851         this_node->details->shutdown = TRUE;
1852 
1853     } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1854         this_node->details->expected_up = TRUE;
1855     }
1856 
1857     if (this_node->details->type == node_ping) {
1858         this_node->details->unclean = FALSE;
1859         online = FALSE;         /* As far as resource management is concerned,
1860                                  * the node is safely offline.
1861                                  * Anyone caught abusing this logic will be shot
1862                                  */
1863 
1864     } else if (!pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
1865         online = determine_online_status_no_fencing(scheduler, node_state,
1866                                                     this_node);
1867 
1868     } else {
1869         online = determine_online_status_fencing(scheduler, node_state,
1870                                                  this_node);
1871     }
1872 
1873     if (online) {
1874         this_node->details->online = TRUE;
1875 
1876     } else {
1877         /* remove node from contention */
1878         this_node->fixed = TRUE; // @COMPAT deprecated and unused
1879         this_node->weight = -PCMK_SCORE_INFINITY;
1880     }
1881 
1882     if (online && this_node->details->shutdown) {
1883         /* don't run resources here */
1884         this_node->fixed = TRUE; // @COMPAT deprecated and unused
1885         this_node->weight = -PCMK_SCORE_INFINITY;
1886     }
1887 
1888     if (this_node->details->type == node_ping) {
1889         crm_info("%s is not a Pacemaker node", pcmk__node_name(this_node));
1890 
1891     } else if (this_node->details->unclean) {
1892         pcmk__sched_warn("%s is unclean", pcmk__node_name(this_node));
1893 
1894     } else if (this_node->details->online) {
1895         crm_info("%s is %s", pcmk__node_name(this_node),
1896                  this_node->details->shutdown ? "shutting down" :
1897                  this_node->details->pending ? "pending" :
1898                  this_node->details->standby ? "standby" :
1899                  this_node->details->maintenance ? "maintenance" : "online");
1900 
1901     } else {
1902         crm_trace("%s is offline", pcmk__node_name(this_node));
1903     }
1904 }
1905 
1906 /*!
1907  * \internal
1908  * \brief Find the end of a resource's name, excluding any clone suffix
1909  *
1910  * \param[in] id  Resource ID to check
1911  *
1912  * \return Pointer to last character of resource's base name
1913  */
1914 const char *
1915 pe_base_name_end(const char *id)
     /* [previous][next][first][last][top][bottom][index][help] */
1916 {
1917     if (!pcmk__str_empty(id)) {
1918         const char *end = id + strlen(id) - 1;
1919 
1920         for (const char *s = end; s > id; --s) {
1921             switch (*s) {
1922                 case '0':
1923                 case '1':
1924                 case '2':
1925                 case '3':
1926                 case '4':
1927                 case '5':
1928                 case '6':
1929                 case '7':
1930                 case '8':
1931                 case '9':
1932                     break;
1933                 case ':':
1934                     return (s == end)? s : (s - 1);
1935                 default:
1936                     return end;
1937             }
1938         }
1939         return end;
1940     }
1941     return NULL;
1942 }
1943 
1944 /*!
1945  * \internal
1946  * \brief Get a resource name excluding any clone suffix
1947  *
1948  * \param[in] last_rsc_id  Resource ID to check
1949  *
1950  * \return Pointer to newly allocated string with resource's base name
1951  * \note It is the caller's responsibility to free() the result.
1952  *       This asserts on error, so callers can assume result is not NULL.
1953  */
1954 char *
1955 clone_strip(const char *last_rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1956 {
1957     const char *end = pe_base_name_end(last_rsc_id);
1958     char *basename = NULL;
1959 
1960     CRM_ASSERT(end);
1961     basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1962     CRM_ASSERT(basename);
1963     return basename;
1964 }
1965 
1966 /*!
1967  * \internal
1968  * \brief Get the name of the first instance of a cloned resource
1969  *
1970  * \param[in] last_rsc_id  Resource ID to check
1971  *
1972  * \return Pointer to newly allocated string with resource's base name plus :0
1973  * \note It is the caller's responsibility to free() the result.
1974  *       This asserts on error, so callers can assume result is not NULL.
1975  */
1976 char *
1977 clone_zero(const char *last_rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1978 {
1979     const char *end = pe_base_name_end(last_rsc_id);
1980     size_t base_name_len = end - last_rsc_id + 1;
1981     char *zero = NULL;
1982 
1983     CRM_ASSERT(end);
1984     zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
1985     memcpy(zero, last_rsc_id, base_name_len);
1986     zero[base_name_len] = ':';
1987     zero[base_name_len + 1] = '0';
1988     return zero;
1989 }
1990 
1991 static pcmk_resource_t *
1992 create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
     /* [previous][next][first][last][top][bottom][index][help] */
1993                      pcmk_scheduler_t *scheduler)
1994 {
1995     pcmk_resource_t *rsc = NULL;
1996     xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
1997 
1998     pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
1999     crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
2000     crm_log_xml_debug(xml_rsc, "Orphan resource");
2001 
2002     if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
2003         return NULL;
2004     }
2005 
2006     if (xml_contains_remote_node(xml_rsc)) {
2007         pcmk_node_t *node;
2008 
2009         crm_debug("Detected orphaned remote node %s", rsc_id);
2010         node = pcmk_find_node(scheduler, rsc_id);
2011         if (node == NULL) {
2012             node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, NULL,
2013                                   scheduler);
2014         }
2015         link_rsc2remotenode(scheduler, rsc);
2016 
2017         if (node) {
2018             crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
2019             node->details->shutdown = TRUE;
2020         }
2021     }
2022 
2023     if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) {
2024         /* This orphaned rsc needs to be mapped to a container. */
2025         crm_trace("Detected orphaned container filler %s", rsc_id);
2026         pcmk__set_rsc_flags(rsc, pcmk_rsc_removed_filler);
2027     }
2028     pcmk__set_rsc_flags(rsc, pcmk_rsc_removed);
2029     scheduler->resources = g_list_append(scheduler->resources, rsc);
2030     return rsc;
2031 }
2032 
2033 /*!
2034  * \internal
2035  * \brief Create orphan instance for anonymous clone resource history
2036  *
2037  * \param[in,out] parent     Clone resource that orphan will be added to
2038  * \param[in]     rsc_id     Orphan's resource ID
2039  * \param[in]     node       Where orphan is active (for logging only)
2040  * \param[in,out] scheduler  Scheduler data
2041  *
2042  * \return Newly added orphaned instance of \p parent
2043  */
2044 static pcmk_resource_t *
2045 create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
     /* [previous][next][first][last][top][bottom][index][help] */
2046                         const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2047 {
2048     pcmk_resource_t *top = pe__create_clone_child(parent, scheduler);
2049 
2050     // find_rsc() because we might be a cloned group
2051     pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL,
2052                                                pcmk_rsc_match_clone_only);
2053 
2054     pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s",
2055                     top->id, parent->id, rsc_id, pcmk__node_name(node));
2056     return orphan;
2057 }
2058 
2059 /*!
2060  * \internal
2061  * \brief Check a node for an instance of an anonymous clone
2062  *
2063  * Return a child instance of the specified anonymous clone, in order of
2064  * preference: (1) the instance running on the specified node, if any;
2065  * (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX
2066  * instances); (3) a newly created orphan (that is, \c PCMK_META_CLONE_MAX
2067  * instances are already active).
2068  *
2069  * \param[in,out] scheduler  Scheduler data
2070  * \param[in]     node       Node on which to check for instance
2071  * \param[in,out] parent     Clone to check
2072  * \param[in]     rsc_id     Name of cloned resource in history (no instance)
2073  */
2074 static pcmk_resource_t *
2075 find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2076                      pcmk_resource_t *parent, const char *rsc_id)
2077 {
2078     GList *rIter = NULL;
2079     pcmk_resource_t *rsc = NULL;
2080     pcmk_resource_t *inactive_instance = NULL;
2081     gboolean skip_inactive = FALSE;
2082 
2083     CRM_ASSERT(pcmk__is_anonymous_clone(parent));
2084 
2085     // Check for active (or partially active, for cloned groups) instance
2086     pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
2087                     rsc_id, pcmk__node_name(node), parent->id);
2088     for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
2089         GList *locations = NULL;
2090         pcmk_resource_t *child = rIter->data;
2091 
2092         /* Check whether this instance is already known to be active or pending
2093          * anywhere, at this stage of unpacking. Because this function is called
2094          * for a resource before the resource's individual operation history
2095          * entries are unpacked, locations will generally not contain the
2096          * desired node.
2097          *
2098          * However, there are three exceptions:
2099          * (1) when child is a cloned group and we have already unpacked the
2100          *     history of another member of the group on the same node;
2101          * (2) when we've already unpacked the history of another numbered
2102          *     instance on the same node (which can happen if
2103          *     PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
2104          * (3) when we re-run calculations on the same scheduler data as part of
2105          *     a simulation.
2106          */
2107         child->fns->location(child, &locations, 2);
2108         if (locations) {
2109             /* We should never associate the same numbered anonymous clone
2110              * instance with multiple nodes, and clone instances can't migrate,
2111              * so there must be only one location, regardless of history.
2112              */
2113             CRM_LOG_ASSERT(locations->next == NULL);
2114 
2115             if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
2116                 /* This child instance is active on the requested node, so check
2117                  * for a corresponding configured resource. We use find_rsc()
2118                  * instead of child because child may be a cloned group, and we
2119                  * need the particular member corresponding to rsc_id.
2120                  *
2121                  * If the history entry is orphaned, rsc will be NULL.
2122                  */
2123                 rsc = parent->fns->find_rsc(child, rsc_id, NULL,
2124                                             pcmk_rsc_match_clone_only);
2125                 if (rsc) {
2126                     /* If there are multiple instance history entries for an
2127                      * anonymous clone in a single node's history (which can
2128                      * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
2129                      * to false), we want to consider the instances beyond the
2130                      * first as orphans, even if there are inactive instance
2131                      * numbers available.
2132                      */
2133                     if (rsc->running_on) {
2134                         crm_notice("Active (now-)anonymous clone %s has "
2135                                    "multiple (orphan) instance histories on %s",
2136                                    parent->id, pcmk__node_name(node));
2137                         skip_inactive = TRUE;
2138                         rsc = NULL;
2139                     } else {
2140                         pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
2141                     }
2142                 }
2143             }
2144             g_list_free(locations);
2145 
2146         } else {
2147             pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
2148             if (!skip_inactive && !inactive_instance
2149                 && !pcmk_is_set(child->flags, pcmk_rsc_blocked)) {
2150                 // Remember one inactive instance in case we don't find active
2151                 inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
2152                                                           pcmk_rsc_match_clone_only);
2153 
2154                 /* ... but don't use it if it was already associated with a
2155                  * pending action on another node
2156                  */
2157                 if ((inactive_instance != NULL) &&
2158                     (inactive_instance->pending_node != NULL) &&
2159                     !pcmk__same_node(inactive_instance->pending_node, node)) {
2160                     inactive_instance = NULL;
2161                 }
2162             }
2163         }
2164     }
2165 
2166     if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2167         pcmk__rsc_trace(parent, "Resource %s, empty slot",
2168                         inactive_instance->id);
2169         rsc = inactive_instance;
2170     }
2171 
2172     /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
2173      * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
2174      * don't want to consume a valid instance number for unclean nodes. Such
2175      * instances may appear to be active according to the history, but should be
2176      * considered inactive, so we can start an instance elsewhere. Treat such
2177      * instances as orphans.
2178      *
2179      * An exception is instances running on guest nodes -- since guest node
2180      * "fencing" is actually just a resource stop, requires shouldn't apply.
2181      *
2182      * @TODO Ideally, we'd use an inactive instance number if it is not needed
2183      * for any clean instances. However, we don't know that at this point.
2184      */
2185     if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
2186         && (!node->details->online || node->details->unclean)
2187         && !pcmk__is_guest_or_bundle_node(node)
2188         && !pe__is_universal_clone(parent, scheduler)) {
2189 
2190         rsc = NULL;
2191     }
2192 
2193     if (rsc == NULL) {
2194         rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
2195         pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id);
2196     }
2197     return rsc;
2198 }
2199 
2200 static pcmk_resource_t *
2201 unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2202                      const char *rsc_id)
2203 {
2204     pcmk_resource_t *rsc = NULL;
2205     pcmk_resource_t *parent = NULL;
2206 
2207     crm_trace("looking for %s", rsc_id);
2208     rsc = pe_find_resource(scheduler->resources, rsc_id);
2209 
2210     if (rsc == NULL) {
2211         /* If we didn't find the resource by its name in the operation history,
2212          * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
2213          * we create a single :0 orphan to match against here.
2214          */
2215         char *clone0_id = clone_zero(rsc_id);
2216         pcmk_resource_t *clone0 = pe_find_resource(scheduler->resources,
2217                                                    clone0_id);
2218 
2219         if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) {
2220             rsc = clone0;
2221             parent = uber_parent(clone0);
2222             crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2223         } else {
2224             crm_trace("%s is not known as %s either (orphan)",
2225                       rsc_id, clone0_id);
2226         }
2227         free(clone0_id);
2228 
2229     } else if (rsc->variant > pcmk_rsc_variant_primitive) {
2230         crm_trace("Resource history for %s is orphaned because it is no longer primitive",
2231                   rsc_id);
2232         return NULL;
2233 
2234     } else {
2235         parent = uber_parent(rsc);
2236     }
2237 
2238     if (pcmk__is_anonymous_clone(parent)) {
2239 
2240         if (pcmk__is_bundled(parent)) {
2241             rsc = pe__find_bundle_replica(parent->parent, node);
2242         } else {
2243             char *base = clone_strip(rsc_id);
2244 
2245             rsc = find_anonymous_clone(scheduler, node, parent, base);
2246             free(base);
2247             CRM_ASSERT(rsc != NULL);
2248         }
2249     }
2250 
2251     if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
2252         && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_none)) {
2253 
2254         pcmk__str_update(&rsc->clone_name, rsc_id);
2255         pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2256                         rsc_id, pcmk__node_name(node), rsc->id,
2257                         pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : "");
2258     }
2259     return rsc;
2260 }
2261 
2262 static pcmk_resource_t *
2263 process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2264                         pcmk_scheduler_t *scheduler)
2265 {
2266     pcmk_resource_t *rsc = NULL;
2267     const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2268 
2269     crm_debug("Detected orphan resource %s on %s",
2270               rsc_id, pcmk__node_name(node));
2271     rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2272     if (rsc == NULL) {
2273         return NULL;
2274     }
2275 
2276     if (!pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) {
2277         pcmk__clear_rsc_flags(rsc, pcmk_rsc_managed);
2278 
2279     } else {
2280         CRM_CHECK(rsc != NULL, return NULL);
2281         pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id);
2282         resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
2283                           "__orphan_do_not_run__", scheduler);
2284     }
2285     return rsc;
2286 }
2287 
2288 static void
2289 process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2290                   enum action_fail_response on_fail)
2291 {
2292     pcmk_node_t *tmpnode = NULL;
2293     char *reason = NULL;
2294     enum action_fail_response save_on_fail = pcmk_on_fail_ignore;
2295 
2296     CRM_ASSERT(rsc);
2297     pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2298                     rsc->id, pcmk_role_text(rsc->role), pcmk__node_name(node),
2299                     pcmk_on_fail_text(on_fail));
2300 
2301     /* process current state */
2302     if (rsc->role != pcmk_role_unknown) {
2303         pcmk_resource_t *iter = rsc;
2304 
2305         while (iter) {
2306             if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
2307                 pcmk_node_t *n = pe__copy_node(node);
2308 
2309                 pcmk__rsc_trace(rsc, "%s%s%s known on %s",
2310                                 rsc->id,
2311                                 ((rsc->clone_name == NULL)? "" : " also known as "),
2312                                 ((rsc->clone_name == NULL)? "" : rsc->clone_name),
2313                                 pcmk__node_name(n));
2314                 g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
2315             }
2316             if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) {
2317                 break;
2318             }
2319             iter = iter->parent;
2320         }
2321     }
2322 
2323     /* If a managed resource is believed to be running, but node is down ... */
2324     if ((rsc->role > pcmk_role_stopped)
2325         && node->details->online == FALSE
2326         && node->details->maintenance == FALSE
2327         && pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2328 
2329         gboolean should_fence = FALSE;
2330 
2331         /* If this is a guest node, fence it (regardless of whether fencing is
2332          * enabled, because guest node fencing is done by recovery of the
2333          * container resource rather than by the fencer). Mark the resource
2334          * we're processing as failed. When the guest comes back up, its
2335          * operation history in the CIB will be cleared, freeing the affected
2336          * resource to run again once we are sure we know its state.
2337          */
2338         if (pcmk__is_guest_or_bundle_node(node)) {
2339             pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2340             should_fence = TRUE;
2341 
2342         } else if (pcmk_is_set(rsc->cluster->flags,
2343                                pcmk_sched_fencing_enabled)) {
2344             if (pcmk__is_remote_node(node)
2345                 && (node->details->remote_rsc != NULL)
2346                 && !pcmk_is_set(node->details->remote_rsc->flags,
2347                                 pcmk_rsc_failed)) {
2348 
2349                 /* Setting unseen means that fencing of the remote node will
2350                  * occur only if the connection resource is not going to start
2351                  * somewhere. This allows connection resources on a failed
2352                  * cluster node to move to another node without requiring the
2353                  * remote nodes to be fenced as well.
2354                  */
2355                 node->details->unseen = TRUE;
2356                 reason = crm_strdup_printf("%s is active there (fencing will be"
2357                                            " revoked if remote connection can "
2358                                            "be re-established elsewhere)",
2359                                            rsc->id);
2360             }
2361             should_fence = TRUE;
2362         }
2363 
2364         if (should_fence) {
2365             if (reason == NULL) {
2366                reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
2367             }
2368             pe_fence_node(rsc->cluster, node, reason, FALSE);
2369         }
2370         free(reason);
2371     }
2372 
2373     /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2374     save_on_fail = on_fail;
2375 
2376     if (node->details->unclean) {
2377         /* No extra processing needed
2378          * Also allows resources to be started again after a node is shot
2379          */
2380         on_fail = pcmk_on_fail_ignore;
2381     }
2382 
2383     switch (on_fail) {
2384         case pcmk_on_fail_ignore:
2385             /* nothing to do */
2386             break;
2387 
2388         case pcmk_on_fail_demote:
2389             pcmk__set_rsc_flags(rsc, pcmk_rsc_failed);
2390             demote_action(rsc, node, FALSE);
2391             break;
2392 
2393         case pcmk_on_fail_fence_node:
2394             /* treat it as if it is still running
2395              * but also mark the node as unclean
2396              */
2397             reason = crm_strdup_printf("%s failed there", rsc->id);
2398             pe_fence_node(rsc->cluster, node, reason, FALSE);
2399             free(reason);
2400             break;
2401 
2402         case pcmk_on_fail_standby_node:
2403             node->details->standby = TRUE;
2404             node->details->standby_onfail = TRUE;
2405             break;
2406 
2407         case pcmk_on_fail_block:
2408             /* is_managed == FALSE will prevent any
2409              * actions being sent for the resource
2410              */
2411             pcmk__clear_rsc_flags(rsc, pcmk_rsc_managed);
2412             pcmk__set_rsc_flags(rsc, pcmk_rsc_blocked);
2413             break;
2414 
2415         case pcmk_on_fail_ban:
2416             /* make sure it comes up somewhere else
2417              * or not at all
2418              */
2419             resource_location(rsc, node, -PCMK_SCORE_INFINITY,
2420                               "__action_migration_auto__", rsc->cluster);
2421             break;
2422 
2423         case pcmk_on_fail_stop:
2424             pe__set_next_role(rsc, pcmk_role_stopped,
2425                               PCMK_META_ON_FAIL "=" PCMK_VALUE_STOP);
2426             break;
2427 
2428         case pcmk_on_fail_restart:
2429             if ((rsc->role != pcmk_role_stopped)
2430                 && (rsc->role != pcmk_role_unknown)) {
2431                 pcmk__set_rsc_flags(rsc,
2432                                     pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2433                 stop_action(rsc, node, FALSE);
2434             }
2435             break;
2436 
2437         case pcmk_on_fail_restart_container:
2438             pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2439             if ((rsc->container != NULL) && pcmk__is_bundled(rsc)) {
2440                 /* A bundle's remote connection can run on a different node than
2441                  * the bundle's container. We don't necessarily know where the
2442                  * container is running yet, so remember it and add a stop
2443                  * action for it later.
2444                  */
2445                 rsc->cluster->stop_needed =
2446                     g_list_prepend(rsc->cluster->stop_needed, rsc->container);
2447             } else if (rsc->container) {
2448                 stop_action(rsc->container, node, FALSE);
2449             } else if ((rsc->role != pcmk_role_stopped)
2450                        && (rsc->role != pcmk_role_unknown)) {
2451                 stop_action(rsc, node, FALSE);
2452             }
2453             break;
2454 
2455         case pcmk_on_fail_reset_remote:
2456             pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2457             if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) {
2458                 tmpnode = NULL;
2459                 if (rsc->is_remote_node) {
2460                     tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
2461                 }
2462                 if (pcmk__is_remote_node(tmpnode)
2463                     && !(tmpnode->details->remote_was_fenced)) {
2464                     /* The remote connection resource failed in a way that
2465                      * should result in fencing the remote node.
2466                      */
2467                     pe_fence_node(rsc->cluster, tmpnode,
2468                                   "remote connection is unrecoverable", FALSE);
2469                 }
2470             }
2471 
2472             /* require the stop action regardless if fencing is occurring or not. */
2473             if (rsc->role > pcmk_role_stopped) {
2474                 stop_action(rsc, node, FALSE);
2475             }
2476 
2477             /* if reconnect delay is in use, prevent the connection from exiting the
2478              * "STOPPED" role until the failure is cleared by the delay timeout. */
2479             if (rsc->remote_reconnect_ms) {
2480                 pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2481             }
2482             break;
2483     }
2484 
2485     /* ensure a remote-node connection failure forces an unclean remote-node
2486      * to be fenced. By setting unseen = FALSE, the remote-node failure will
2487      * result in a fencing operation regardless if we're going to attempt to 
2488      * reconnect to the remote-node in this transition or not. */
2489     if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) {
2490         tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
2491         if (tmpnode && tmpnode->details->unclean) {
2492             tmpnode->details->unseen = FALSE;
2493         }
2494     }
2495 
2496     if ((rsc->role != pcmk_role_stopped)
2497         && (rsc->role != pcmk_role_unknown)) {
2498         if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
2499             if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2500                 crm_notice("Removed resource %s is active on %s and will be "
2501                            "stopped when possible",
2502                            rsc->id, pcmk__node_name(node));
2503             } else {
2504                 crm_notice("Removed resource %s must be stopped manually on %s "
2505                            "because " PCMK_OPT_STOP_ORPHAN_RESOURCES
2506                            " is set to false", rsc->id, pcmk__node_name(node));
2507             }
2508         }
2509 
2510         native_add_running(rsc, node, rsc->cluster,
2511                            (save_on_fail != pcmk_on_fail_ignore));
2512         switch (on_fail) {
2513             case pcmk_on_fail_ignore:
2514                 break;
2515             case pcmk_on_fail_demote:
2516             case pcmk_on_fail_block:
2517                 pcmk__set_rsc_flags(rsc, pcmk_rsc_failed);
2518                 break;
2519             default:
2520                 pcmk__set_rsc_flags(rsc,
2521                                     pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2522                 break;
2523         }
2524 
2525     } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2526         /* Only do this for older status sections that included instance numbers
2527          * Otherwise stopped instances will appear as orphans
2528          */
2529         pcmk__rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)",
2530                         rsc->clone_name, rsc->id);
2531         free(rsc->clone_name);
2532         rsc->clone_name = NULL;
2533 
2534     } else {
2535         GList *possible_matches = pe__resource_actions(rsc, node,
2536                                                        PCMK_ACTION_STOP, FALSE);
2537         GList *gIter = possible_matches;
2538 
2539         for (; gIter != NULL; gIter = gIter->next) {
2540             pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2541 
2542             pcmk__set_action_flags(stop, pcmk_action_optional);
2543         }
2544 
2545         g_list_free(possible_matches);
2546     }
2547 
2548     /* A successful stop after migrate_to on the migration source doesn't make
2549      * the partially migrated resource stopped on the migration target.
2550      */
2551     if ((rsc->role == pcmk_role_stopped)
2552         && rsc->partial_migration_source
2553         && rsc->partial_migration_source->details == node->details
2554         && rsc->partial_migration_target
2555         && rsc->running_on) {
2556 
2557         rsc->role = pcmk_role_started;
2558     }
2559 }
2560 
2561 /* create active recurring operations as optional */
2562 static void
2563 process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
2564                   int start_index, int stop_index,
2565                   GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2566 {
2567     int counter = -1;
2568     const char *task = NULL;
2569     const char *status = NULL;
2570     GList *gIter = sorted_op_list;
2571 
2572     CRM_ASSERT(rsc);
2573     pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
2574                     rsc->id, start_index, stop_index);
2575 
2576     for (; gIter != NULL; gIter = gIter->next) {
2577         xmlNode *rsc_op = (xmlNode *) gIter->data;
2578 
2579         guint interval_ms = 0;
2580         char *key = NULL;
2581         const char *id = pcmk__xe_id(rsc_op);
2582 
2583         counter++;
2584 
2585         if (node->details->online == FALSE) {
2586             pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
2587                             rsc->id, pcmk__node_name(node));
2588             break;
2589 
2590             /* Need to check if there's a monitor for role="Stopped" */
2591         } else if (start_index < stop_index && counter <= stop_index) {
2592             pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2593                             id, pcmk__node_name(node));
2594             continue;
2595 
2596         } else if (counter < start_index) {
2597             pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
2598                             id, pcmk__node_name(node), counter);
2599             continue;
2600         }
2601 
2602         crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms);
2603         if (interval_ms == 0) {
2604             pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2605                             id, pcmk__node_name(node));
2606             continue;
2607         }
2608 
2609         status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2610         if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2611             pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
2612                             id, pcmk__node_name(node));
2613             continue;
2614         }
2615         task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2616         /* create the action */
2617         key = pcmk__op_key(rsc->id, task, interval_ms);
2618         pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
2619         custom_action(rsc, key, task, node, TRUE, scheduler);
2620     }
2621 }
2622 
2623 void
2624 calculate_active_ops(const GList *sorted_op_list, int *start_index,
     /* [previous][next][first][last][top][bottom][index][help] */
2625                      int *stop_index)
2626 {
2627     int counter = -1;
2628     int implied_monitor_start = -1;
2629     int implied_clone_start = -1;
2630     const char *task = NULL;
2631     const char *status = NULL;
2632 
2633     *stop_index = -1;
2634     *start_index = -1;
2635 
2636     for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2637         const xmlNode *rsc_op = (const xmlNode *) iter->data;
2638 
2639         counter++;
2640 
2641         task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2642         status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2643 
2644         if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2645             && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2646             *stop_index = counter;
2647 
2648         } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2649                                         PCMK_ACTION_MIGRATE_FROM, NULL)) {
2650             *start_index = counter;
2651 
2652         } else if ((implied_monitor_start <= *stop_index)
2653                    && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2654                                    pcmk__str_casei)) {
2655             const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE);
2656 
2657             if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2658                 implied_monitor_start = counter;
2659             }
2660         } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
2661                                         PCMK_ACTION_DEMOTE, NULL)) {
2662             implied_clone_start = counter;
2663         }
2664     }
2665 
2666     if (*start_index == -1) {
2667         if (implied_clone_start != -1) {
2668             *start_index = implied_clone_start;
2669         } else if (implied_monitor_start != -1) {
2670             *start_index = implied_monitor_start;
2671         }
2672     }
2673 }
2674 
2675 // If resource history entry has shutdown lock, remember lock node and time
2676 static void
2677 unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
2678                      const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2679 {
2680     time_t lock_time = 0;   // When lock started (i.e. node shutdown time)
2681 
2682     if ((crm_element_value_epoch(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK,
2683                                  &lock_time) == pcmk_ok) && (lock_time != 0)) {
2684 
2685         if ((scheduler->shutdown_lock > 0)
2686             && (get_effective_time(scheduler)
2687                 > (lock_time + scheduler->shutdown_lock))) {
2688             pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
2689                            rsc->id, pcmk__node_name(node));
2690             pe__clear_resource_history(rsc, node);
2691         } else {
2692             /* @COMPAT I don't like breaking const signatures, but
2693              * rsc->lock_node should really be const -- we just can't change it
2694              * until the next API compatibility break.
2695              */
2696             rsc->lock_node = (pcmk_node_t *) node;
2697             rsc->lock_time = lock_time;
2698         }
2699     }
2700 }
2701 
2702 /*!
2703  * \internal
2704  * \brief Unpack one \c PCMK__XE_LRM_RESOURCE entry from a node's CIB status
2705  *
2706  * \param[in,out] node       Node whose status is being unpacked
2707  * \param[in]     rsc_entry  \c PCMK__XE_LRM_RESOURCE XML being unpacked
2708  * \param[in,out] scheduler  Scheduler data
2709  *
2710  * \return Resource corresponding to the entry, or NULL if no operation history
2711  */
2712 static pcmk_resource_t *
2713 unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
     /* [previous][next][first][last][top][bottom][index][help] */
2714                     pcmk_scheduler_t *scheduler)
2715 {
2716     GList *gIter = NULL;
2717     int stop_index = -1;
2718     int start_index = -1;
2719     enum rsc_role_e req_role = pcmk_role_unknown;
2720 
2721     const char *rsc_id = pcmk__xe_id(lrm_resource);
2722 
2723     pcmk_resource_t *rsc = NULL;
2724     GList *op_list = NULL;
2725     GList *sorted_op_list = NULL;
2726 
2727     xmlNode *rsc_op = NULL;
2728     xmlNode *last_failure = NULL;
2729 
2730     enum action_fail_response on_fail = pcmk_on_fail_ignore;
2731     enum rsc_role_e saved_role = pcmk_role_unknown;
2732 
2733     if (rsc_id == NULL) {
2734         pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
2735                          " entry: No " PCMK_XA_ID);
2736         crm_log_xml_info(lrm_resource, "missing-id");
2737         return NULL;
2738     }
2739     crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s",
2740               rsc_id, pcmk__node_name(node));
2741 
2742     /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
2743      * them
2744      */
2745     for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
2746                                        NULL);
2747          rsc_op != NULL; rsc_op = pcmk__xe_next_same(rsc_op)) {
2748 
2749         op_list = g_list_prepend(op_list, rsc_op);
2750     }
2751 
2752     if (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
2753         if (op_list == NULL) {
2754             // If there are no operations, there is nothing to do
2755             return NULL;
2756         }
2757     }
2758 
2759     /* find the resource */
2760     rsc = unpack_find_resource(scheduler, node, rsc_id);
2761     if (rsc == NULL) {
2762         if (op_list == NULL) {
2763             // If there are no operations, there is nothing to do
2764             return NULL;
2765         } else {
2766             rsc = process_orphan_resource(lrm_resource, node, scheduler);
2767         }
2768     }
2769     CRM_ASSERT(rsc != NULL);
2770 
2771     // Check whether the resource is "shutdown-locked" to this node
2772     if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
2773         unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2774     }
2775 
2776     /* process operations */
2777     saved_role = rsc->role;
2778     rsc->role = pcmk_role_unknown;
2779     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2780 
2781     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2782         xmlNode *rsc_op = (xmlNode *) gIter->data;
2783 
2784         unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2785     }
2786 
2787     /* create active recurring operations as optional */
2788     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2789     process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2790                       scheduler);
2791 
2792     /* no need to free the contents */
2793     g_list_free(sorted_op_list);
2794 
2795     process_rsc_state(rsc, node, on_fail);
2796 
2797     if (get_target_role(rsc, &req_role)) {
2798         if ((rsc->next_role == pcmk_role_unknown)
2799             || (req_role < rsc->next_role)) {
2800 
2801             pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE);
2802 
2803         } else if (req_role > rsc->next_role) {
2804             pcmk__rsc_info(rsc,
2805                            "%s: Not overwriting calculated next role %s"
2806                            " with requested next role %s",
2807                            rsc->id, pcmk_role_text(rsc->next_role),
2808                            pcmk_role_text(req_role));
2809         }
2810     }
2811 
2812     if (saved_role > rsc->role) {
2813         rsc->role = saved_role;
2814     }
2815 
2816     return rsc;
2817 }
2818 
2819 static void
2820 handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list,
     /* [previous][next][first][last][top][bottom][index][help] */
2821                                   pcmk_scheduler_t *scheduler)
2822 {
2823     for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list, NULL,
2824                                                          NULL, NULL);
2825          rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
2826 
2827         pcmk_resource_t *rsc;
2828         pcmk_resource_t *container;
2829         const char *rsc_id;
2830         const char *container_id;
2831 
2832         if (!pcmk__xe_is(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2833             continue;
2834         }
2835 
2836         container_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER);
2837         rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2838         if (container_id == NULL || rsc_id == NULL) {
2839             continue;
2840         }
2841 
2842         container = pe_find_resource(scheduler->resources, container_id);
2843         if (container == NULL) {
2844             continue;
2845         }
2846 
2847         rsc = pe_find_resource(scheduler->resources, rsc_id);
2848         if ((rsc == NULL) || (rsc->container != NULL)
2849             || !pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) {
2850             continue;
2851         }
2852 
2853         pcmk__rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
2854                         rsc->id, container_id);
2855         rsc->container = container;
2856         container->fillers = g_list_append(container->fillers, rsc);
2857     }
2858 }
2859 
2860 /*!
2861  * \internal
2862  * \brief Unpack one node's lrm status section
2863  *
2864  * \param[in,out] node       Node whose status is being unpacked
2865  * \param[in]     xml        CIB node state XML
2866  * \param[in,out] scheduler  Scheduler data
2867  */
2868 static void
2869 unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
     /* [previous][next][first][last][top][bottom][index][help] */
2870                 pcmk_scheduler_t *scheduler)
2871 {
2872     bool found_orphaned_container_filler = false;
2873 
2874     // Drill down to PCMK__XE_LRM_RESOURCES section
2875     xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
2876     if (xml == NULL) {
2877         return;
2878     }
2879     xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
2880     if (xml == NULL) {
2881         return;
2882     }
2883 
2884     // Unpack each PCMK__XE_LRM_RESOURCE entry
2885     for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
2886                                                          PCMK__XE_LRM_RESOURCE,
2887                                                          NULL, NULL);
2888          rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) {
2889 
2890         pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2891 
2892         if ((rsc != NULL)
2893             && pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) {
2894             found_orphaned_container_filler = true;
2895         }
2896     }
2897 
2898     /* Now that all resource state has been unpacked for this node, map any
2899      * orphaned container fillers to their container resource.
2900      */
2901     if (found_orphaned_container_filler) {
2902         handle_orphaned_container_fillers(xml, scheduler);
2903     }
2904 }
2905 
2906 static void
2907 set_active(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
2908 {
2909     const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2910 
2911     if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) {
2912         rsc->role = pcmk_role_unpromoted;
2913     } else {
2914         rsc->role = pcmk_role_started;
2915     }
2916 }
2917 
2918 static void
2919 set_node_score(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
2920 {
2921     pcmk_node_t *node = value;
2922     int *score = user_data;
2923 
2924     node->weight = *score;
2925 }
2926 
2927 #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
2928                          "/" PCMK__XE_NODE_STATE
2929 #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM             \
2930                                "/" PCMK__XE_LRM_RESOURCES   \
2931                                "/" PCMK__XE_LRM_RESOURCE
2932 #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
2933 
2934 static xmlNode *
2935 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
     /* [previous][next][first][last][top][bottom][index][help] */
2936             int target_rc, pcmk_scheduler_t *scheduler)
2937 {
2938     GString *xpath = NULL;
2939     xmlNode *xml = NULL;
2940 
2941     CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2942               return NULL);
2943 
2944     xpath = g_string_sized_new(256);
2945     pcmk__g_strcat(xpath,
2946                    XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
2947                    SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
2948                    SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
2949                    NULL);
2950 
2951     /* Need to check against transition_magic too? */
2952     if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
2953         pcmk__g_strcat(xpath,
2954                        " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
2955                        NULL);
2956 
2957     } else if ((source != NULL)
2958                && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
2959         pcmk__g_strcat(xpath,
2960                        " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
2961                        NULL);
2962     } else {
2963         g_string_append_c(xpath, ']');
2964     }
2965 
2966     xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2967                            LOG_DEBUG);
2968     g_string_free(xpath, TRUE);
2969 
2970     if (xml && target_rc >= 0) {
2971         int rc = PCMK_OCF_UNKNOWN_ERROR;
2972         int status = PCMK_EXEC_ERROR;
2973 
2974         crm_element_value_int(xml, PCMK__XA_RC_CODE, &rc);
2975         crm_element_value_int(xml, PCMK__XA_OP_STATUS, &status);
2976         if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
2977             return NULL;
2978         }
2979     }
2980     return xml;
2981 }
2982 
2983 static xmlNode *
2984 find_lrm_resource(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
2985                   pcmk_scheduler_t *scheduler)
2986 {
2987     GString *xpath = NULL;
2988     xmlNode *xml = NULL;
2989 
2990     CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
2991 
2992     xpath = g_string_sized_new(256);
2993     pcmk__g_strcat(xpath,
2994                    XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
2995                    SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
2996                    NULL);
2997 
2998     xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2999                            LOG_DEBUG);
3000 
3001     g_string_free(xpath, TRUE);
3002     return xml;
3003 }
3004 
3005 /*!
3006  * \internal
3007  * \brief Check whether a resource has no completed action history on a node
3008  *
3009  * \param[in,out] rsc        Resource to check
3010  * \param[in]     node_name  Node to check
3011  *
3012  * \return true if \p rsc_id is unknown on \p node_name, otherwise false
3013  */
3014 static bool
3015 unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
3016 {
3017     bool result = false;
3018     xmlXPathObjectPtr search;
3019     char *xpath = NULL;
3020 
3021     xpath = crm_strdup_printf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
3022                               SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']"
3023                               SUB_XPATH_LRM_RSC_OP
3024                               "[@" PCMK__XA_RC_CODE "!='%d']",
3025                               node_name, rsc->id, PCMK_OCF_UNKNOWN);
3026 
3027     search = xpath_search(rsc->cluster->input, xpath);
3028     result = (numXpathResults(search) == 0);
3029     freeXpathObject(search);
3030     free(xpath);
3031     return result;
3032 }
3033 
3034 /*!
3035  * \brief Check whether a probe/monitor indicating the resource was not running
3036  * on a node happened after some event
3037  *
3038  * \param[in]     rsc_id     Resource being checked
3039  * \param[in]     node_name  Node being checked
3040  * \param[in]     xml_op     Event that monitor is being compared to
3041  * \param[in]     same_node  Whether the operations are on the same node
3042  * \param[in,out] scheduler  Scheduler data
3043  *
3044  * \return true if such a monitor happened after event, false otherwise
3045  */
3046 static bool
3047 monitor_not_running_after(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
3048                           const xmlNode *xml_op, bool same_node,
3049                           pcmk_scheduler_t *scheduler)
3050 {
3051     /* Any probe/monitor operation on the node indicating it was not running
3052      * there
3053      */
3054     xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
3055                                    NULL, PCMK_OCF_NOT_RUNNING, scheduler);
3056 
3057     return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0);
3058 }
3059 
3060 /*!
3061  * \brief Check whether any non-monitor operation on a node happened after some
3062  * event
3063  *
3064  * \param[in]     rsc_id     Resource being checked
3065  * \param[in]     node_name  Node being checked
3066  * \param[in]     xml_op     Event that non-monitor is being compared to
3067  * \param[in]     same_node  Whether the operations are on the same node
3068  * \param[in,out] scheduler  Scheduler data
3069  *
3070  * \return true if such a operation happened after event, false otherwise
3071  */
3072 static bool
3073 non_monitor_after(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
3074                   const xmlNode *xml_op, bool same_node,
3075                   pcmk_scheduler_t *scheduler)
3076 {
3077     xmlNode *lrm_resource = NULL;
3078 
3079     lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
3080     if (lrm_resource == NULL) {
3081         return false;
3082     }
3083 
3084     for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
3085                                             NULL, NULL);
3086          op != NULL; op = pcmk__xe_next_same(op)) {
3087 
3088         const char * task = NULL;
3089 
3090         if (op == xml_op) {
3091             continue;
3092         }
3093 
3094         task = crm_element_value(op, PCMK_XA_OPERATION);
3095 
3096         if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP,
3097                              PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
3098                              NULL)
3099             && pe__is_newer_op(op, xml_op, same_node) > 0) {
3100             return true;
3101         }
3102     }
3103 
3104     return false;
3105 }
3106 
3107 /*!
3108  * \brief Check whether the resource has newer state on a node after a migration
3109  * attempt
3110  *
3111  * \param[in]     rsc_id        Resource being checked
3112  * \param[in]     node_name     Node being checked
3113  * \param[in]     migrate_to    Any migrate_to event that is being compared to
3114  * \param[in]     migrate_from  Any migrate_from event that is being compared to
3115  * \param[in,out] scheduler     Scheduler data
3116  *
3117  * \return true if such a operation happened after event, false otherwise
3118  */
3119 static bool
3120 newer_state_after_migrate(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
3121                           const xmlNode *migrate_to,
3122                           const xmlNode *migrate_from,
3123                           pcmk_scheduler_t *scheduler)
3124 {
3125     const xmlNode *xml_op = migrate_to;
3126     const char *source = NULL;
3127     const char *target = NULL;
3128     bool same_node = false;
3129 
3130     if (migrate_from) {
3131         xml_op = migrate_from;
3132     }
3133 
3134     source = crm_element_value(xml_op, PCMK__META_MIGRATE_SOURCE);
3135     target = crm_element_value(xml_op, PCMK__META_MIGRATE_TARGET);
3136 
3137     /* It's preferred to compare to the migrate event on the same node if
3138      * existing, since call ids are more reliable.
3139      */
3140     if (pcmk__str_eq(node_name, target, pcmk__str_casei)) {
3141         if (migrate_from) {
3142            xml_op = migrate_from;
3143            same_node = true;
3144 
3145         } else {
3146            xml_op = migrate_to;
3147         }
3148 
3149     } else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3150         if (migrate_to) {
3151            xml_op = migrate_to;
3152            same_node = true;
3153 
3154         } else {
3155            xml_op = migrate_from;
3156         }
3157     }
3158 
3159     /* If there's any newer non-monitor operation on the node, or any newer
3160      * probe/monitor operation on the node indicating it was not running there,
3161      * the migration events potentially no longer matter for the node.
3162      */
3163     return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler)
3164            || monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
3165                                         scheduler);
3166 }
3167 
3168 /*!
3169  * \internal
3170  * \brief Parse migration source and target node names from history entry
3171  *
3172  * \param[in]  entry        Resource history entry for a migration action
3173  * \param[in]  source_node  If not NULL, source must match this node
3174  * \param[in]  target_node  If not NULL, target must match this node
3175  * \param[out] source_name  Where to store migration source node name
3176  * \param[out] target_name  Where to store migration target node name
3177  *
3178  * \return Standard Pacemaker return code
3179  */
3180 static int
3181 get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
     /* [previous][next][first][last][top][bottom][index][help] */
3182                          const pcmk_node_t *target_node,
3183                          const char **source_name, const char **target_name)
3184 {
3185     *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE);
3186     *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET);
3187     if ((*source_name == NULL) || (*target_name == NULL)) {
3188         pcmk__config_err("Ignoring resource history entry %s without "
3189                          PCMK__META_MIGRATE_SOURCE " and "
3190                          PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
3191         return pcmk_rc_unpack_error;
3192     }
3193 
3194     if ((source_node != NULL)
3195         && !pcmk__str_eq(*source_name, source_node->details->uname,
3196                          pcmk__str_casei|pcmk__str_null_matches)) {
3197         pcmk__config_err("Ignoring resource history entry %s because "
3198                          PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
3199                          pcmk__xe_id(entry), *source_name,
3200                          pcmk__node_name(source_node));
3201         return pcmk_rc_unpack_error;
3202     }
3203 
3204     if ((target_node != NULL)
3205         && !pcmk__str_eq(*target_name, target_node->details->uname,
3206                          pcmk__str_casei|pcmk__str_null_matches)) {
3207         pcmk__config_err("Ignoring resource history entry %s because "
3208                          PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
3209                          pcmk__xe_id(entry), *target_name,
3210                          pcmk__node_name(target_node));
3211         return pcmk_rc_unpack_error;
3212     }
3213 
3214     return pcmk_rc_ok;
3215 }
3216 
3217 /*
3218  * \internal
3219  * \brief Add a migration source to a resource's list of dangling migrations
3220  *
3221  * If the migrate_to and migrate_from actions in a live migration both
3222  * succeeded, but there is no stop on the source, the migration is considered
3223  * "dangling." Add the source to the resource's dangling migration list, which
3224  * will be used to schedule a stop on the source without affecting the target.
3225  *
3226  * \param[in,out] rsc   Resource involved in migration
3227  * \param[in]     node  Migration source
3228  */
3229 static void
3230 add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
3231 {
3232     pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3233                     rsc->id, pcmk__node_name(node));
3234     rsc->role = pcmk_role_stopped;
3235     rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations,
3236                                               (gpointer) node);
3237 }
3238 
3239 /*!
3240  * \internal
3241  * \brief Update resource role etc. after a successful migrate_to action
3242  *
3243  * \param[in,out] history  Parsed action result history
3244  */
3245 static void
3246 unpack_migrate_to_success(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3247 {
3248     /* A complete migration sequence is:
3249      * 1. migrate_to on source node (which succeeded if we get to this function)
3250      * 2. migrate_from on target node
3251      * 3. stop on source node
3252      *
3253      * If no migrate_from has happened, the migration is considered to be
3254      * "partial". If the migrate_from succeeded but no stop has happened, the
3255      * migration is considered to be "dangling".
3256      *
3257      * If a successful migrate_to and stop have happened on the source node, we
3258      * still need to check for a partial migration, due to scenarios (easier to
3259      * produce with batch-limit=1) like:
3260      *
3261      * - A resource is migrating from node1 to node2, and a migrate_to is
3262      *   initiated for it on node1.
3263      *
3264      * - node2 goes into standby mode while the migrate_to is pending, which
3265      *   aborts the transition.
3266      *
3267      * - Upon completion of the migrate_to, a new transition schedules a stop
3268      *   on both nodes and a start on node1.
3269      *
3270      * - If the new transition is aborted for any reason while the resource is
3271      *   stopping on node1, the transition after that stop completes will see
3272      *   the migrate_to and stop on the source, but it's still a partial
3273      *   migration, and the resource must be stopped on node2 because it is
3274      *   potentially active there due to the migrate_to.
3275      *
3276      *   We also need to take into account that either node's history may be
3277      *   cleared at any point in the migration process.
3278      */
3279     int from_rc = PCMK_OCF_OK;
3280     int from_status = PCMK_EXEC_PENDING;
3281     pcmk_node_t *target_node = NULL;
3282     xmlNode *migrate_from = NULL;
3283     const char *source = NULL;
3284     const char *target = NULL;
3285     bool source_newer_op = false;
3286     bool target_newer_state = false;
3287     bool active_on_target = false;
3288 
3289     // Get source and target node names from XML
3290     if (get_migration_node_names(history->xml, history->node, NULL, &source,
3291                                  &target) != pcmk_rc_ok) {
3292         return;
3293     }
3294 
3295     // Check for newer state on the source
3296     source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3297                                         true, history->rsc->cluster);
3298 
3299     // Check for a migrate_from action from this source on the target
3300     migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3301                                target, source, -1, history->rsc->cluster);
3302     if (migrate_from != NULL) {
3303         if (source_newer_op) {
3304             /* There's a newer non-monitor operation on the source and a
3305              * migrate_from on the target, so this migrate_to is irrelevant to
3306              * the resource's state.
3307              */
3308             return;
3309         }
3310         crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
3311         crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
3312     }
3313 
3314     /* If the resource has newer state on both the source and target after the
3315      * migration events, this migrate_to is irrelevant to the resource's state.
3316      */
3317     target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3318                                                    history->xml, migrate_from,
3319                                                    history->rsc->cluster);
3320     if (source_newer_op && target_newer_state) {
3321         return;
3322     }
3323 
3324     /* Check for dangling migration (migrate_from succeeded but stop not done).
3325      * We know there's no stop because we already returned if the target has a
3326      * migrate_from and the source has any newer non-monitor operation.
3327      */
3328     if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3329         add_dangling_migration(history->rsc, history->node);
3330         return;
3331     }
3332 
3333     /* Without newer state, this migrate_to implies the resource is active.
3334      * (Clones are not allowed to migrate, so role can't be promoted.)
3335      */
3336     history->rsc->role = pcmk_role_started;
3337 
3338     target_node = pcmk_find_node(history->rsc->cluster, target);
3339     active_on_target = !target_newer_state && (target_node != NULL)
3340                        && target_node->details->online;
3341 
3342     if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3343         if (active_on_target) {
3344             native_add_running(history->rsc, target_node, history->rsc->cluster,
3345                                TRUE);
3346         } else {
3347             // Mark resource as failed, require recovery, and prevent migration
3348             pcmk__set_rsc_flags(history->rsc,
3349                                 pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
3350             pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_migratable);
3351         }
3352         return;
3353     }
3354 
3355     // The migrate_from is pending, complete but erased, or to be scheduled
3356 
3357     /* If there is no history at all for the resource on an online target, then
3358      * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3359      * have the probe result, it will be reflected in target_newer_state.
3360      */
3361     if ((target_node != NULL) && target_node->details->online
3362         && unknown_on_node(history->rsc, target)) {
3363         return;
3364     }
3365 
3366     if (active_on_target) {
3367         pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
3368                                                   source);
3369 
3370         native_add_running(history->rsc, target_node, history->rsc->cluster,
3371                            FALSE);
3372         if ((source_node != NULL) && source_node->details->online) {
3373             /* This is a partial migration: the migrate_to completed
3374              * successfully on the source, but the migrate_from has not
3375              * completed. Remember the source and target; if the newly
3376              * chosen target remains the same when we schedule actions
3377              * later, we may continue with the migration.
3378              */
3379             history->rsc->partial_migration_target = target_node;
3380             history->rsc->partial_migration_source = source_node;
3381         }
3382 
3383     } else if (!source_newer_op) {
3384         // Mark resource as failed, require recovery, and prevent migration
3385         pcmk__set_rsc_flags(history->rsc,
3386                             pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
3387         pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_migratable);
3388     }
3389 }
3390 
3391 /*!
3392  * \internal
3393  * \brief Update resource role etc. after a failed migrate_to action
3394  *
3395  * \param[in,out] history  Parsed action result history
3396  */
3397 static void
3398 unpack_migrate_to_failure(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3399 {
3400     xmlNode *target_migrate_from = NULL;
3401     const char *source = NULL;
3402     const char *target = NULL;
3403 
3404     // Get source and target node names from XML
3405     if (get_migration_node_names(history->xml, history->node, NULL, &source,
3406                                  &target) != pcmk_rc_ok) {
3407         return;
3408     }
3409 
3410     /* If a migration failed, we have to assume the resource is active. Clones
3411      * are not allowed to migrate, so role can't be promoted.
3412      */
3413     history->rsc->role = pcmk_role_started;
3414 
3415     // Check for migrate_from on the target
3416     target_migrate_from = find_lrm_op(history->rsc->id,
3417                                       PCMK_ACTION_MIGRATE_FROM, target, source,
3418                                       PCMK_OCF_OK, history->rsc->cluster);
3419 
3420     if (/* If the resource state is unknown on the target, it will likely be
3421          * probed there.
3422          * Don't just consider it running there. We will get back here anyway in
3423          * case the probe detects it's running there.
3424          */
3425         !unknown_on_node(history->rsc, target)
3426         /* If the resource has newer state on the target after the migration
3427          * events, this migrate_to no longer matters for the target.
3428          */
3429         && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3430                                       target_migrate_from,
3431                                       history->rsc->cluster)) {
3432         /* The resource has no newer state on the target, so assume it's still
3433          * active there.
3434          * (if it is up).
3435          */
3436         pcmk_node_t *target_node = pcmk_find_node(history->rsc->cluster,
3437                                                   target);
3438 
3439         if (target_node && target_node->details->online) {
3440             native_add_running(history->rsc, target_node, history->rsc->cluster,
3441                                FALSE);
3442         }
3443 
3444     } else if (!non_monitor_after(history->rsc->id, source, history->xml, true,
3445                                   history->rsc->cluster)) {
3446         /* We know the resource has newer state on the target, but this
3447          * migrate_to still matters for the source as long as there's no newer
3448          * non-monitor operation there.
3449          */
3450 
3451         // Mark node as having dangling migration so we can force a stop later
3452         history->rsc->dangling_migrations =
3453             g_list_prepend(history->rsc->dangling_migrations,
3454                            (gpointer) history->node);
3455     }
3456 }
3457 
3458 /*!
3459  * \internal
3460  * \brief Update resource role etc. after a failed migrate_from action
3461  *
3462  * \param[in,out] history  Parsed action result history
3463  */
3464 static void
3465 unpack_migrate_from_failure(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3466 {
3467     xmlNode *source_migrate_to = NULL;
3468     const char *source = NULL;
3469     const char *target = NULL;
3470 
3471     // Get source and target node names from XML
3472     if (get_migration_node_names(history->xml, NULL, history->node, &source,
3473                                  &target) != pcmk_rc_ok) {
3474         return;
3475     }
3476 
3477     /* If a migration failed, we have to assume the resource is active. Clones
3478      * are not allowed to migrate, so role can't be promoted.
3479      */
3480     history->rsc->role = pcmk_role_started;
3481 
3482     // Check for a migrate_to on the source
3483     source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3484                                     source, target, PCMK_OCF_OK,
3485                                     history->rsc->cluster);
3486 
3487     if (/* If the resource state is unknown on the source, it will likely be
3488          * probed there.
3489          * Don't just consider it running there. We will get back here anyway in
3490          * case the probe detects it's running there.
3491          */
3492         !unknown_on_node(history->rsc, source)
3493         /* If the resource has newer state on the source after the migration
3494          * events, this migrate_from no longer matters for the source.
3495          */
3496         && !newer_state_after_migrate(history->rsc->id, source,
3497                                       source_migrate_to, history->xml,
3498                                       history->rsc->cluster)) {
3499         /* The resource has no newer state on the source, so assume it's still
3500          * active there (if it is up).
3501          */
3502         pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
3503                                                   source);
3504 
3505         if (source_node && source_node->details->online) {
3506             native_add_running(history->rsc, source_node, history->rsc->cluster,
3507                                TRUE);
3508         }
3509     }
3510 }
3511 
3512 /*!
3513  * \internal
3514  * \brief Add an action to cluster's list of failed actions
3515  *
3516  * \param[in,out] history  Parsed action result history
3517  */
3518 static void
3519 record_failed_op(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3520 {
3521     if (!(history->node->details->online)) {
3522         return;
3523     }
3524 
3525     for (const xmlNode *xIter = history->rsc->cluster->failed->children;
3526          xIter != NULL; xIter = xIter->next) {
3527 
3528         const char *key = pcmk__xe_history_key(xIter);
3529         const char *uname = crm_element_value(xIter, PCMK_XA_UNAME);
3530 
3531         if (pcmk__str_eq(history->key, key, pcmk__str_none)
3532             && pcmk__str_eq(uname, history->node->details->uname,
3533                             pcmk__str_casei)) {
3534             crm_trace("Skipping duplicate entry %s on %s",
3535                       history->key, pcmk__node_name(history->node));
3536             return;
3537         }
3538     }
3539 
3540     crm_trace("Adding entry for %s on %s to failed action list",
3541               history->key, pcmk__node_name(history->node));
3542     crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
3543     crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
3544     pcmk__xml_copy(history->rsc->cluster->failed, history->xml);
3545 }
3546 
3547 static char *
3548 last_change_str(const xmlNode *xml_op)
     /* [previous][next][first][last][top][bottom][index][help] */
3549 {
3550     time_t when;
3551     char *result = NULL;
3552 
3553     if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE,
3554                                 &when) == pcmk_ok) {
3555         char *when_s = pcmk__epoch2str(&when, 0);
3556         const char *p = strchr(when_s, ' ');
3557 
3558         // Skip day of week to make message shorter
3559         if ((p != NULL) && (*(++p) != '\0')) {
3560             result = pcmk__str_copy(p);
3561         }
3562         free(when_s);
3563     }
3564 
3565     if (result == NULL) {
3566         result = pcmk__str_copy("unknown_time");
3567     }
3568 
3569     return result;
3570 }
3571 
3572 /*!
3573  * \internal
3574  * \brief Compare two on-fail values
3575  *
3576  * \param[in] first   One on-fail value to compare
3577  * \param[in] second  The other on-fail value to compare
3578  *
3579  * \return A negative number if second is more severe than first, zero if they
3580  *         are equal, or a positive number if first is more severe than second.
3581  * \note This is only needed until the action_fail_response values can be
3582  *       renumbered at the next API compatibility break.
3583  */
3584 static int
3585 cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
     /* [previous][next][first][last][top][bottom][index][help] */
3586 {
3587     switch (first) {
3588         case pcmk_on_fail_demote:
3589             switch (second) {
3590                 case pcmk_on_fail_ignore:
3591                     return 1;
3592                 case pcmk_on_fail_demote:
3593                     return 0;
3594                 default:
3595                     return -1;
3596             }
3597             break;
3598 
3599         case pcmk_on_fail_reset_remote:
3600             switch (second) {
3601                 case pcmk_on_fail_ignore:
3602                 case pcmk_on_fail_demote:
3603                 case pcmk_on_fail_restart:
3604                     return 1;
3605                 case pcmk_on_fail_reset_remote:
3606                     return 0;
3607                 default:
3608                     return -1;
3609             }
3610             break;
3611 
3612         case pcmk_on_fail_restart_container:
3613             switch (second) {
3614                 case pcmk_on_fail_ignore:
3615                 case pcmk_on_fail_demote:
3616                 case pcmk_on_fail_restart:
3617                 case pcmk_on_fail_reset_remote:
3618                     return 1;
3619                 case pcmk_on_fail_restart_container:
3620                     return 0;
3621                 default:
3622                     return -1;
3623             }
3624             break;
3625 
3626         default:
3627             break;
3628     }
3629     switch (second) {
3630         case pcmk_on_fail_demote:
3631             return (first == pcmk_on_fail_ignore)? -1 : 1;
3632 
3633         case pcmk_on_fail_reset_remote:
3634             switch (first) {
3635                 case pcmk_on_fail_ignore:
3636                 case pcmk_on_fail_demote:
3637                 case pcmk_on_fail_restart:
3638                     return -1;
3639                 default:
3640                     return 1;
3641             }
3642             break;
3643 
3644         case pcmk_on_fail_restart_container:
3645             switch (first) {
3646                 case pcmk_on_fail_ignore:
3647                 case pcmk_on_fail_demote:
3648                 case pcmk_on_fail_restart:
3649                 case pcmk_on_fail_reset_remote:
3650                     return -1;
3651                 default:
3652                     return 1;
3653             }
3654             break;
3655 
3656         default:
3657             break;
3658     }
3659     return first - second;
3660 }
3661 
3662 /*!
3663  * \internal
3664  * \brief Ban a resource (or its clone if an anonymous instance) from all nodes
3665  *
3666  * \param[in,out] rsc  Resource to ban
3667  */
3668 static void
3669 ban_from_all_nodes(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
3670 {
3671     int score = -PCMK_SCORE_INFINITY;
3672     pcmk_resource_t *fail_rsc = rsc;
3673 
3674     if (fail_rsc->parent != NULL) {
3675         pcmk_resource_t *parent = uber_parent(fail_rsc);
3676 
3677         if (pcmk__is_anonymous_clone(parent)) {
3678             /* For anonymous clones, if an operation with
3679              * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
3680              * entire clone must stop.
3681              */
3682             fail_rsc = parent;
3683         }
3684     }
3685 
3686     // Ban the resource from all nodes
3687     crm_notice("%s will not be started under current conditions", fail_rsc->id);
3688     if (fail_rsc->allowed_nodes != NULL) {
3689         g_hash_table_destroy(fail_rsc->allowed_nodes);
3690     }
3691     fail_rsc->allowed_nodes = pe__node_list2table(rsc->cluster->nodes);
3692     g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
3693 }
3694 
3695 /*!
3696  * \internal
3697  * \brief Get configured failure handling and role after failure for an action
3698  *
3699  * \param[in,out] history    Unpacked action history entry
3700  * \param[out]    on_fail    Where to set configured failure handling
3701  * \param[out]    fail_role  Where to set to role after failure
3702  */
3703 static void
3704 unpack_failure_handling(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
3705                         enum action_fail_response *on_fail,
3706                         enum rsc_role_e *fail_role)
3707 {
3708     xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3709                                                history->interval_ms, true);
3710 
3711     GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3712                                                 history->task,
3713                                                 history->interval_ms, config);
3714 
3715     const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
3716 
3717     *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3718                                    history->interval_ms, on_fail_str);
3719     *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3720                                           meta);
3721     g_hash_table_destroy(meta);
3722 }
3723 
3724 /*!
3725  * \internal
3726  * \brief Update resource role, failure handling, etc., after a failed action
3727  *
3728  * \param[in,out] history         Parsed action result history
3729  * \param[in]     config_on_fail  Action failure handling from configuration
3730  * \param[in]     fail_role       Resource's role after failure of this action
3731  * \param[out]    last_failure    This will be set to the history XML
3732  * \param[in,out] on_fail         Actual handling of action result
3733  */
3734 static void
3735 unpack_rsc_op_failure(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
3736                       enum action_fail_response config_on_fail,
3737                       enum rsc_role_e fail_role, xmlNode **last_failure,
3738                       enum action_fail_response *on_fail)
3739 {
3740     bool is_probe = false;
3741     char *last_change_s = NULL;
3742 
3743     *last_failure = history->xml;
3744 
3745     is_probe = pcmk_xe_is_probe(history->xml);
3746     last_change_s = last_change_str(history->xml);
3747 
3748     if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster)
3749         && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3750         crm_trace("Unexpected result (%s%s%s) was recorded for "
3751                   "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3752                   services_ocf_exitcode_str(history->exit_status),
3753                   (pcmk__str_empty(history->exit_reason)? "" : ": "),
3754                   pcmk__s(history->exit_reason, ""),
3755                   (is_probe? "probe" : history->task), history->rsc->id,
3756                   pcmk__node_name(history->node), last_change_s,
3757                   history->exit_status, history->id);
3758     } else {
3759         pcmk__sched_warn("Unexpected result (%s%s%s) was recorded for %s of "
3760                          "%s on %s at %s " CRM_XS " exit-status=%d id=%s",
3761                          services_ocf_exitcode_str(history->exit_status),
3762                          (pcmk__str_empty(history->exit_reason)? "" : ": "),
3763                          pcmk__s(history->exit_reason, ""),
3764                          (is_probe? "probe" : history->task), history->rsc->id,
3765                          pcmk__node_name(history->node), last_change_s,
3766                          history->exit_status, history->id);
3767 
3768         if (is_probe && (history->exit_status != PCMK_OCF_OK)
3769             && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3770             && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3771 
3772             /* A failed (not just unexpected) probe result could mean the user
3773              * didn't know resources will be probed even where they can't run.
3774              */
3775             crm_notice("If it is not possible for %s to run on %s, see "
3776                        "the " PCMK_XA_RESOURCE_DISCOVERY " option for location "
3777                        "constraints",
3778                        history->rsc->id, pcmk__node_name(history->node));
3779         }
3780 
3781         record_failed_op(history);
3782     }
3783 
3784     free(last_change_s);
3785 
3786     if (cmp_on_fail(*on_fail, config_on_fail) < 0) {
3787         pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3788                         pcmk_on_fail_text(*on_fail),
3789                         pcmk_on_fail_text(config_on_fail), history->key);
3790         *on_fail = config_on_fail;
3791     }
3792 
3793     if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3794         resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
3795                           "__stop_fail__", history->rsc->cluster);
3796 
3797     } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3798         unpack_migrate_to_failure(history);
3799 
3800     } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3801         unpack_migrate_from_failure(history);
3802 
3803     } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3804         history->rsc->role = pcmk_role_promoted;
3805 
3806     } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3807         if (config_on_fail == pcmk_on_fail_block) {
3808             history->rsc->role = pcmk_role_promoted;
3809             pe__set_next_role(history->rsc, pcmk_role_stopped,
3810                               "demote with " PCMK_META_ON_FAIL "=block");
3811 
3812         } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3813             history->rsc->role = pcmk_role_stopped;
3814 
3815         } else {
3816             /* Staying in the promoted role would put the scheduler and
3817              * controller into a loop. Setting the role to unpromoted is not
3818              * dangerous because the resource will be stopped as part of
3819              * recovery, and any promotion will be ordered after that stop.
3820              */
3821             history->rsc->role = pcmk_role_unpromoted;
3822         }
3823     }
3824 
3825     if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3826         /* leave stopped */
3827         pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3828         history->rsc->role = pcmk_role_stopped;
3829 
3830     } else if (history->rsc->role < pcmk_role_started) {
3831         pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3832         set_active(history->rsc);
3833     }
3834 
3835     pcmk__rsc_trace(history->rsc,
3836                     "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3837                     history->rsc->id, pcmk_role_text(history->rsc->role),
3838                     pcmk__btoa(history->node->details->unclean),
3839                     pcmk_on_fail_text(config_on_fail),
3840                     pcmk_role_text(fail_role));
3841 
3842     if ((fail_role != pcmk_role_started)
3843         && (history->rsc->next_role < fail_role)) {
3844         pe__set_next_role(history->rsc, fail_role, "failure");
3845     }
3846 
3847     if (fail_role == pcmk_role_stopped) {
3848         ban_from_all_nodes(history->rsc);
3849     }
3850 }
3851 
3852 /*!
3853  * \internal
3854  * \brief Block a resource with a failed action if it cannot be recovered
3855  *
3856  * If resource action is a failed stop and fencing is not possible, mark the
3857  * resource as unmanaged and blocked, since recovery cannot be done.
3858  *
3859  * \param[in,out] history  Parsed action history entry
3860  */
3861 static void
3862 block_if_unrecoverable(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3863 {
3864     char *last_change_s = NULL;
3865 
3866     if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3867         return; // All actions besides stop are always recoverable
3868     }
3869     if (pe_can_fence(history->node->details->data_set, history->node)) {
3870         return; // Failed stops are recoverable via fencing
3871     }
3872 
3873     last_change_s = last_change_str(history->xml);
3874     pcmk__sched_err("No further recovery can be attempted for %s "
3875                     "because %s on %s failed (%s%s%s) at %s "
3876                     CRM_XS " rc=%d id=%s",
3877                     history->rsc->id, history->task,
3878                     pcmk__node_name(history->node),
3879                     services_ocf_exitcode_str(history->exit_status),
3880                     (pcmk__str_empty(history->exit_reason)? "" : ": "),
3881                     pcmk__s(history->exit_reason, ""),
3882                     last_change_s, history->exit_status, history->id);
3883 
3884     free(last_change_s);
3885 
3886     pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_managed);
3887     pcmk__set_rsc_flags(history->rsc, pcmk_rsc_blocked);
3888 }
3889 
3890 /*!
3891  * \internal
3892  * \brief Update action history's execution status and why
3893  *
3894  * \param[in,out] history  Parsed action history entry
3895  * \param[out]    why      Where to store reason for update
3896  * \param[in]     value    New value
3897  * \param[in]     reason   Description of why value was changed
3898  */
3899 static inline void
3900 remap_because(struct action_history *history, const char **why, int value,
     /* [previous][next][first][last][top][bottom][index][help] */
3901               const char *reason)
3902 {
3903     if (history->execution_status != value) {
3904         history->execution_status = value;
3905         *why = reason;
3906     }
3907 }
3908 
3909 /*!
3910  * \internal
3911  * \brief Remap informational monitor results and operation status
3912  *
3913  * For the monitor results, certain OCF codes are for providing extended information
3914  * to the user about services that aren't yet failed but not entirely healthy either.
3915  * These must be treated as the "normal" result by Pacemaker.
3916  *
3917  * For operation status, the action result can be used to determine an appropriate
3918  * status for the purposes of responding to the action.  The status provided by the
3919  * executor is not directly usable since the executor does not know what was expected.
3920  *
3921  * \param[in,out] history  Parsed action history entry
3922  * \param[in,out] on_fail  What should be done about the result
3923  * \param[in]     expired  Whether result is expired
3924  *
3925  * \note If the result is remapped and the node is not shutting down or failed,
3926  *       the operation will be recorded in the scheduler data's list of failed
3927  *       operations to highlight it for the user.
3928  *
3929  * \note This may update the resource's current and next role.
3930  */
3931 static void
3932 remap_operation(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
3933                 enum action_fail_response *on_fail, bool expired)
3934 {
3935     bool is_probe = false;
3936     int orig_exit_status = history->exit_status;
3937     int orig_exec_status = history->execution_status;
3938     const char *why = NULL;
3939     const char *task = history->task;
3940 
3941     // Remap degraded results to their successful counterparts
3942     history->exit_status = pcmk__effective_rc(history->exit_status);
3943     if (history->exit_status != orig_exit_status) {
3944         why = "degraded result";
3945         if (!expired && (!history->node->details->shutdown
3946                          || history->node->details->online)) {
3947             record_failed_op(history);
3948         }
3949     }
3950 
3951     if (!pcmk__is_bundled(history->rsc)
3952         && pcmk_xe_mask_probe_failure(history->xml)
3953         && ((history->execution_status != PCMK_EXEC_DONE)
3954             || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3955         history->execution_status = PCMK_EXEC_DONE;
3956         history->exit_status = PCMK_OCF_NOT_RUNNING;
3957         why = "equivalent probe result";
3958     }
3959 
3960     /* If the executor reported an execution status of anything but done or
3961      * error, consider that final. But for done or error, we know better whether
3962      * it should be treated as a failure or not, because we know the expected
3963      * result.
3964      */
3965     switch (history->execution_status) {
3966         case PCMK_EXEC_DONE:
3967         case PCMK_EXEC_ERROR:
3968             break;
3969 
3970         // These should be treated as node-fatal
3971         case PCMK_EXEC_NO_FENCE_DEVICE:
3972         case PCMK_EXEC_NO_SECRETS:
3973             remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3974                           "node-fatal error");
3975             goto remap_done;
3976 
3977         default:
3978             goto remap_done;
3979     }
3980 
3981     is_probe = pcmk_xe_is_probe(history->xml);
3982     if (is_probe) {
3983         task = "probe";
3984     }
3985 
3986     if (history->expected_exit_status < 0) {
3987         /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3988          * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3989          * expected exit status in the transition key, which (along with the
3990          * similar case of a corrupted transition key in the CIB) will be
3991          * reported to this function as -1. Pacemaker 2.0+ does not support
3992          * rolling upgrades from those versions or processing of saved CIB files
3993          * from those versions, so we do not need to care much about this case.
3994          */
3995         remap_because(history, &why, PCMK_EXEC_ERROR,
3996                       "obsolete history format");
3997         pcmk__config_warn("Expected result not found for %s on %s "
3998                           "(corrupt or obsolete CIB?)",
3999                           history->key, pcmk__node_name(history->node));
4000 
4001     } else if (history->exit_status == history->expected_exit_status) {
4002         remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
4003 
4004     } else {
4005         remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
4006         pcmk__rsc_debug(history->rsc,
4007                         "%s on %s: expected %d (%s), got %d (%s%s%s)",
4008                         history->key, pcmk__node_name(history->node),
4009                         history->expected_exit_status,
4010                         services_ocf_exitcode_str(history->expected_exit_status),
4011                         history->exit_status,
4012                         services_ocf_exitcode_str(history->exit_status),
4013                         (pcmk__str_empty(history->exit_reason)? "" : ": "),
4014                         pcmk__s(history->exit_reason, ""));
4015     }
4016 
4017     switch (history->exit_status) {
4018         case PCMK_OCF_OK:
4019             if (is_probe
4020                 && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
4021                 char *last_change_s = last_change_str(history->xml);
4022 
4023                 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4024                 pcmk__rsc_info(history->rsc,
4025                                "Probe found %s active on %s at %s",
4026                                history->rsc->id, pcmk__node_name(history->node),
4027                                last_change_s);
4028                 free(last_change_s);
4029             }
4030             break;
4031 
4032         case PCMK_OCF_NOT_RUNNING:
4033             if (is_probe
4034                 || (history->expected_exit_status == history->exit_status)
4035                 || !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) {
4036 
4037                 /* For probes, recurring monitors for the Stopped role, and
4038                  * unmanaged resources, "not running" is not considered a
4039                  * failure.
4040                  */
4041                 remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
4042                 history->rsc->role = pcmk_role_stopped;
4043                 *on_fail = pcmk_on_fail_ignore;
4044                 pe__set_next_role(history->rsc, pcmk_role_unknown,
4045                                   "not running");
4046             }
4047             break;
4048 
4049         case PCMK_OCF_RUNNING_PROMOTED:
4050             if (is_probe
4051                 && (history->exit_status != history->expected_exit_status)) {
4052                 char *last_change_s = last_change_str(history->xml);
4053 
4054                 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4055                 pcmk__rsc_info(history->rsc,
4056                                "Probe found %s active and promoted on %s at %s",
4057                                 history->rsc->id,
4058                                 pcmk__node_name(history->node), last_change_s);
4059                 free(last_change_s);
4060             }
4061             if (!expired
4062                 || (history->exit_status == history->expected_exit_status)) {
4063                 history->rsc->role = pcmk_role_promoted;
4064             }
4065             break;
4066 
4067         case PCMK_OCF_FAILED_PROMOTED:
4068             if (!expired) {
4069                 history->rsc->role = pcmk_role_promoted;
4070             }
4071             remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
4072             break;
4073 
4074         case PCMK_OCF_NOT_CONFIGURED:
4075             remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
4076             break;
4077 
4078         case PCMK_OCF_UNIMPLEMENT_FEATURE:
4079             {
4080                 guint interval_ms = 0;
4081                 crm_element_value_ms(history->xml, PCMK_META_INTERVAL,
4082                                      &interval_ms);
4083 
4084                 if (interval_ms == 0) {
4085                     if (!expired) {
4086                         block_if_unrecoverable(history);
4087                     }
4088                     remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
4089                                   "exit status");
4090                 } else {
4091                     remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
4092                                   "exit status");
4093                 }
4094             }
4095             break;
4096 
4097         case PCMK_OCF_NOT_INSTALLED:
4098         case PCMK_OCF_INVALID_PARAM:
4099         case PCMK_OCF_INSUFFICIENT_PRIV:
4100             if (!expired) {
4101                 block_if_unrecoverable(history);
4102             }
4103             remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
4104             break;
4105 
4106         default:
4107             if (history->execution_status == PCMK_EXEC_DONE) {
4108                 char *last_change_s = last_change_str(history->xml);
4109 
4110                 crm_info("Treating unknown exit status %d from %s of %s "
4111                          "on %s at %s as failure",
4112                          history->exit_status, task, history->rsc->id,
4113                          pcmk__node_name(history->node), last_change_s);
4114                 remap_because(history, &why, PCMK_EXEC_ERROR,
4115                               "unknown exit status");
4116                 free(last_change_s);
4117             }
4118             break;
4119     }
4120 
4121 remap_done:
4122     if (why != NULL) {
4123         pcmk__rsc_trace(history->rsc,
4124                         "Remapped %s result from [%s: %s] to [%s: %s] "
4125                         "because of %s",
4126                         history->key, pcmk_exec_status_str(orig_exec_status),
4127                         crm_exit_str(orig_exit_status),
4128                         pcmk_exec_status_str(history->execution_status),
4129                         crm_exit_str(history->exit_status), why);
4130     }
4131 }
4132 
4133 // return TRUE if start or monitor last failure but parameters changed
4134 static bool
4135 should_clear_for_param_change(const xmlNode *xml_op, const char *task,
     /* [previous][next][first][last][top][bottom][index][help] */
4136                               pcmk_resource_t *rsc, pcmk_node_t *node)
4137 {
4138     if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) {
4139         if (pe__bundle_needs_remote_name(rsc)) {
4140             /* We haven't allocated resources yet, so we can't reliably
4141              * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4142              * When that's needed, defer the check until later.
4143              */
4144             pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure,
4145                                 rsc->cluster);
4146 
4147         } else {
4148             pcmk__op_digest_t *digest_data = NULL;
4149 
4150             digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4151                                                 rsc->cluster);
4152             switch (digest_data->rc) {
4153                 case pcmk__digest_unknown:
4154                     crm_trace("Resource %s history entry %s on %s"
4155                               " has no digest to compare",
4156                               rsc->id, pcmk__xe_history_key(xml_op),
4157                               node->details->id);
4158                     break;
4159                 case pcmk__digest_match:
4160                     break;
4161                 default:
4162                     return TRUE;
4163             }
4164         }
4165     }
4166     return FALSE;
4167 }
4168 
4169 // Order action after fencing of remote node, given connection rsc
4170 static void
4171 order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
     /* [previous][next][first][last][top][bottom][index][help] */
4172                            pcmk_scheduler_t *scheduler)
4173 {
4174     pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
4175 
4176     if (remote_node) {
4177         pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4178                                            FALSE, scheduler);
4179 
4180         order_actions(fence, action, pcmk__ar_first_implies_then);
4181     }
4182 }
4183 
4184 static bool
4185 should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
     /* [previous][next][first][last][top][bottom][index][help] */
4186                               guint interval_ms, bool is_last_failure)
4187 {
4188     /* Clearing failures of recurring monitors has special concerns. The
4189      * executor reports only changes in the monitor result, so if the
4190      * monitor is still active and still getting the same failure result,
4191      * that will go undetected after the failure is cleared.
4192      *
4193      * Also, the operation history will have the time when the recurring
4194      * monitor result changed to the given code, not the time when the
4195      * result last happened.
4196      *
4197      * @TODO We probably should clear such failures only when the failure
4198      * timeout has passed since the last occurrence of the failed result.
4199      * However we don't record that information. We could maybe approximate
4200      * that by clearing only if there is a more recent successful monitor or
4201      * stop result, but we don't even have that information at this point
4202      * since we are still unpacking the resource's operation history.
4203      *
4204      * This is especially important for remote connection resources with a
4205      * reconnect interval, so in that case, we skip clearing failures
4206      * if the remote node hasn't been fenced.
4207      */
4208     if (rsc->remote_reconnect_ms
4209         && pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)
4210         && (interval_ms != 0)
4211         && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4212 
4213         pcmk_node_t *remote_node = pcmk_find_node(rsc->cluster, rsc->id);
4214 
4215         if (remote_node && !remote_node->details->remote_was_fenced) {
4216             if (is_last_failure) {
4217                 crm_info("Waiting to clear monitor failure for remote node %s"
4218                          " until fencing has occurred", rsc->id);
4219             }
4220             return TRUE;
4221         }
4222     }
4223     return FALSE;
4224 }
4225 
4226 /*!
4227  * \internal
4228  * \brief Check operation age and schedule failure clearing when appropriate
4229  *
4230  * This function has two distinct purposes. The first is to check whether an
4231  * operation history entry is expired (i.e. the resource has a failure timeout,
4232  * the entry is older than the timeout, and the resource either has no fail
4233  * count or its fail count is entirely older than the timeout). The second is to
4234  * schedule fail count clearing when appropriate (i.e. the operation is expired
4235  * and either the resource has an expired fail count or the operation is a
4236  * last_failure for a remote connection resource with a reconnect interval,
4237  * or the operation is a last_failure for a start or monitor operation and the
4238  * resource's parameters have changed since the operation).
4239  *
4240  * \param[in,out] history  Parsed action result history
4241  *
4242  * \return true if operation history entry is expired, otherwise false
4243  */
4244 static bool
4245 check_operation_expiry(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
4246 {
4247     bool expired = false;
4248     bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
4249     time_t last_run = 0;
4250     int unexpired_fail_count = 0;
4251     const char *clear_reason = NULL;
4252 
4253     if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4254         pcmk__rsc_trace(history->rsc,
4255                         "Resource history entry %s on %s is not expired: "
4256                         "Not Installed does not expire",
4257                         history->id, pcmk__node_name(history->node));
4258         return false; // "Not installed" must always be cleared manually
4259     }
4260 
4261     if ((history->rsc->failure_timeout > 0)
4262         && (crm_element_value_epoch(history->xml, PCMK_XA_LAST_RC_CHANGE,
4263                                     &last_run) == 0)) {
4264 
4265         /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
4266          * timestamp
4267          */
4268 
4269         time_t now = get_effective_time(history->rsc->cluster);
4270         time_t last_failure = 0;
4271 
4272         // Is this particular operation history older than the failure timeout?
4273         if ((now >= (last_run + history->rsc->failure_timeout))
4274             && !should_ignore_failure_timeout(history->rsc, history->task,
4275                                               history->interval_ms,
4276                                               is_last_failure)) {
4277             expired = true;
4278         }
4279 
4280         // Does the resource as a whole have an unexpired fail count?
4281         unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4282                                                 &last_failure,
4283                                                 pcmk__fc_effective,
4284                                                 history->xml);
4285 
4286         // Update scheduler recheck time according to *last* failure
4287         crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
4288                   " last-failure@%lld",
4289                   history->id, (long long) last_run, (expired? "" : "not "),
4290                   (long long) now, unexpired_fail_count,
4291                   history->rsc->failure_timeout, (long long) last_failure);
4292         last_failure += history->rsc->failure_timeout + 1;
4293         if (unexpired_fail_count && (now < last_failure)) {
4294             pe__update_recheck_time(last_failure, history->rsc->cluster,
4295                                     "fail count expiration");
4296         }
4297     }
4298 
4299     if (expired) {
4300         if (pe_get_failcount(history->node, history->rsc, NULL,
4301                              pcmk__fc_default, history->xml)) {
4302             // There is a fail count ignoring timeout
4303 
4304             if (unexpired_fail_count == 0) {
4305                 // There is no fail count considering timeout
4306                 clear_reason = "it expired";
4307 
4308             } else {
4309                 /* This operation is old, but there is an unexpired fail count.
4310                  * In a properly functioning cluster, this should only be
4311                  * possible if this operation is not a failure (otherwise the
4312                  * fail count should be expired too), so this is really just a
4313                  * failsafe.
4314                  */
4315                 pcmk__rsc_trace(history->rsc,
4316                                 "Resource history entry %s on %s is not "
4317                                 "expired: Unexpired fail count",
4318                                 history->id, pcmk__node_name(history->node));
4319                 expired = false;
4320             }
4321 
4322         } else if (is_last_failure
4323                    && (history->rsc->remote_reconnect_ms != 0)) {
4324             /* Clear any expired last failure when reconnect interval is set,
4325              * even if there is no fail count.
4326              */
4327             clear_reason = "reconnect interval is set";
4328         }
4329     }
4330 
4331     if (!expired && is_last_failure
4332         && should_clear_for_param_change(history->xml, history->task,
4333                                          history->rsc, history->node)) {
4334         clear_reason = "resource parameters have changed";
4335     }
4336 
4337     if (clear_reason != NULL) {
4338         pcmk_action_t *clear_op = NULL;
4339 
4340         // Schedule clearing of the fail count
4341         clear_op = pe__clear_failcount(history->rsc, history->node,
4342                                        clear_reason, history->rsc->cluster);
4343 
4344         if (pcmk_is_set(history->rsc->cluster->flags,
4345                         pcmk_sched_fencing_enabled)
4346             && (history->rsc->remote_reconnect_ms != 0)) {
4347             /* If we're clearing a remote connection due to a reconnect
4348              * interval, we want to wait until any scheduled fencing
4349              * completes.
4350              *
4351              * We could limit this to remote_node->details->unclean, but at
4352              * this point, that's always true (it won't be reliable until
4353              * after unpack_node_history() is done).
4354              */
4355             crm_info("Clearing %s failure will wait until any scheduled "
4356                      "fencing of %s completes",
4357                      history->task, history->rsc->id);
4358             order_after_remote_fencing(clear_op, history->rsc,
4359                                        history->rsc->cluster);
4360         }
4361     }
4362 
4363     if (expired && (history->interval_ms == 0)
4364         && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4365         switch (history->exit_status) {
4366             case PCMK_OCF_OK:
4367             case PCMK_OCF_NOT_RUNNING:
4368             case PCMK_OCF_RUNNING_PROMOTED:
4369             case PCMK_OCF_DEGRADED:
4370             case PCMK_OCF_DEGRADED_PROMOTED:
4371                 // Don't expire probes that return these values
4372                 pcmk__rsc_trace(history->rsc,
4373                                 "Resource history entry %s on %s is not "
4374                                 "expired: Probe result",
4375                              history->id, pcmk__node_name(history->node));
4376                 expired = false;
4377                 break;
4378         }
4379     }
4380 
4381     return expired;
4382 }
4383 
4384 int
4385 pe__target_rc_from_xml(const xmlNode *xml_op)
     /* [previous][next][first][last][top][bottom][index][help] */
4386 {
4387     int target_rc = 0;
4388     const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY);
4389 
4390     if (key == NULL) {
4391         return -1;
4392     }
4393     decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4394     return target_rc;
4395 }
4396 
4397 /*!
4398  * \internal
4399  * \brief Update a resource's state for an action result
4400  *
4401  * \param[in,out] history       Parsed action history entry
4402  * \param[in]     exit_status   Exit status to base new state on
4403  * \param[in]     last_failure  Resource's last_failure entry, if known
4404  * \param[in,out] on_fail       Resource's current failure handling
4405  */
4406 static void
4407 update_resource_state(struct action_history *history, int exit_status,
     /* [previous][next][first][last][top][bottom][index][help] */
4408                       const xmlNode *last_failure,
4409                       enum action_fail_response *on_fail)
4410 {
4411     bool clear_past_failure = false;
4412 
4413     if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4414         || (!pcmk__is_bundled(history->rsc)
4415             && pcmk_xe_mask_probe_failure(history->xml))) {
4416         history->rsc->role = pcmk_role_stopped;
4417 
4418     } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4419         clear_past_failure = true;
4420 
4421     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4422                             pcmk__str_none)) {
4423         if ((last_failure != NULL)
4424             && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4425                             pcmk__str_none)) {
4426             clear_past_failure = true;
4427         }
4428         if (history->rsc->role < pcmk_role_started) {
4429             set_active(history->rsc);
4430         }
4431 
4432     } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4433         history->rsc->role = pcmk_role_started;
4434         clear_past_failure = true;
4435 
4436     } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4437         history->rsc->role = pcmk_role_stopped;
4438         clear_past_failure = true;
4439 
4440     } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4441                             pcmk__str_none)) {
4442         history->rsc->role = pcmk_role_promoted;
4443         clear_past_failure = true;
4444 
4445     } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4446                             pcmk__str_none)) {
4447         if (*on_fail == pcmk_on_fail_demote) {
4448             /* Demote clears an error only if
4449              * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
4450              */
4451             clear_past_failure = true;
4452         }
4453         history->rsc->role = pcmk_role_unpromoted;
4454 
4455     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4456                             pcmk__str_none)) {
4457         history->rsc->role = pcmk_role_started;
4458         clear_past_failure = true;
4459 
4460     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4461                             pcmk__str_none)) {
4462         unpack_migrate_to_success(history);
4463 
4464     } else if (history->rsc->role < pcmk_role_started) {
4465         pcmk__rsc_trace(history->rsc, "%s active on %s",
4466                         history->rsc->id, pcmk__node_name(history->node));
4467         set_active(history->rsc);
4468     }
4469 
4470     if (!clear_past_failure) {
4471         return;
4472     }
4473 
4474     switch (*on_fail) {
4475         case pcmk_on_fail_stop:
4476         case pcmk_on_fail_ban:
4477         case pcmk_on_fail_standby_node:
4478         case pcmk_on_fail_fence_node:
4479             pcmk__rsc_trace(history->rsc,
4480                             "%s (%s) is not cleared by a completed %s",
4481                             history->rsc->id, pcmk_on_fail_text(*on_fail),
4482                             history->task);
4483             break;
4484 
4485         case pcmk_on_fail_block:
4486         case pcmk_on_fail_ignore:
4487         case pcmk_on_fail_demote:
4488         case pcmk_on_fail_restart:
4489         case pcmk_on_fail_restart_container:
4490             *on_fail = pcmk_on_fail_ignore;
4491             pe__set_next_role(history->rsc, pcmk_role_unknown,
4492                               "clear past failures");
4493             break;
4494 
4495         case pcmk_on_fail_reset_remote:
4496             if (history->rsc->remote_reconnect_ms == 0) {
4497                 /* With no reconnect interval, the connection is allowed to
4498                  * start again after the remote node is fenced and
4499                  * completely stopped. (With a reconnect interval, we wait
4500                  * for the failure to be cleared entirely before attempting
4501                  * to reconnect.)
4502                  */
4503                 *on_fail = pcmk_on_fail_ignore;
4504                 pe__set_next_role(history->rsc, pcmk_role_unknown,
4505                                   "clear past failures and reset remote");
4506             }
4507             break;
4508     }
4509 }
4510 
4511 /*!
4512  * \internal
4513  * \brief Check whether a given history entry matters for resource state
4514  *
4515  * \param[in] history  Parsed action history entry
4516  *
4517  * \return true if action can affect resource state, otherwise false
4518  */
4519 static inline bool
4520 can_affect_state(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
4521 {
4522 #if 0
4523     /* @COMPAT It might be better to parse only actions we know we're interested
4524      * in, rather than exclude a couple we don't. However that would be a
4525      * behavioral change that should be done at a major or minor series release.
4526      * Currently, unknown operations can affect whether a resource is considered
4527      * active and/or failed.
4528      */
4529      return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4530                              PCMK_ACTION_START, PCMK_ACTION_STOP,
4531                              PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
4532                              PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
4533                              "asyncmon", NULL);
4534 #else
4535      return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY,
4536                               PCMK_ACTION_META_DATA, NULL);
4537 #endif
4538 }
4539 
4540 /*!
4541  * \internal
4542  * \brief Unpack execution/exit status and exit reason from a history entry
4543  *
4544  * \param[in,out] history  Action history entry to unpack
4545  *
4546  * \return Standard Pacemaker return code
4547  */
4548 static int
4549 unpack_action_result(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
4550 {
4551     if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS,
4552                                &(history->execution_status)) < 0)
4553         || (history->execution_status < PCMK_EXEC_PENDING)
4554         || (history->execution_status > PCMK_EXEC_MAX)
4555         || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4556         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4557                          "with invalid " PCMK__XA_OP_STATUS " '%s'",
4558                          history->id, history->rsc->id,
4559                          pcmk__node_name(history->node),
4560                          pcmk__s(crm_element_value(history->xml,
4561                                                    PCMK__XA_OP_STATUS),
4562                                  ""));
4563         return pcmk_rc_unpack_error;
4564     }
4565     if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE,
4566                                &(history->exit_status)) < 0)
4567         || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4568 #if 0
4569         /* @COMPAT We should ignore malformed entries, but since that would
4570          * change behavior, it should be done at a major or minor series
4571          * release.
4572          */
4573         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4574                          "with invalid " PCMK__XA_RC_CODE " '%s'",
4575                          history->id, history->rsc->id,
4576                          pcmk__node_name(history->node),
4577                          pcmk__s(crm_element_value(history->xml,
4578                                                    PCMK__XA_RC_CODE),
4579                                  ""));
4580         return pcmk_rc_unpack_error;
4581 #else
4582         history->exit_status = CRM_EX_ERROR;
4583 #endif
4584     }
4585     history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON);
4586     return pcmk_rc_ok;
4587 }
4588 
4589 /*!
4590  * \internal
4591  * \brief Process an action history entry whose result expired
4592  *
4593  * \param[in,out] history           Parsed action history entry
4594  * \param[in]     orig_exit_status  Action exit status before remapping
4595  *
4596  * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the
4597  *         entry needs no further processing)
4598  */
4599 static int
4600 process_expired_result(struct action_history *history, int orig_exit_status)
     /* [previous][next][first][last][top][bottom][index][help] */
4601 {
4602     if (!pcmk__is_bundled(history->rsc)
4603         && pcmk_xe_mask_probe_failure(history->xml)
4604         && (orig_exit_status != history->expected_exit_status)) {
4605 
4606         if (history->rsc->role <= pcmk_role_stopped) {
4607             history->rsc->role = pcmk_role_unknown;
4608         }
4609         crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
4610                   "Masked failure expired",
4611                   history->id, history->rsc->id,
4612                   pcmk__node_name(history->node));
4613         return pcmk_rc_ok;
4614     }
4615 
4616     if (history->exit_status == history->expected_exit_status) {
4617         return pcmk_rc_undetermined; // Only failures expire
4618     }
4619 
4620     if (history->interval_ms == 0) {
4621         crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
4622                    "Expired failure",
4623                    history->id, history->task, history->rsc->id,
4624                    pcmk__node_name(history->node));
4625         return pcmk_rc_ok;
4626     }
4627 
4628     if (history->node->details->online && !history->node->details->unclean) {
4629         /* Reschedule the recurring action. schedule_cancel() won't work at
4630          * this stage, so as a hacky workaround, forcibly change the restart
4631          * digest so pcmk__check_action_config() does what we want later.
4632          *
4633          * @TODO We should skip this if there is a newer successful monitor.
4634          *       Also, this causes rescheduling only if the history entry
4635          *       has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
4636          *       scheduler regression test doesn't, but that may not be a
4637          *       realistic scenario in production).
4638          */
4639         crm_notice("Rescheduling %s-interval %s of %s on %s "
4640                    "after failure expired",
4641                    pcmk__readable_interval(history->interval_ms), history->task,
4642                    history->rsc->id, pcmk__node_name(history->node));
4643         crm_xml_add(history->xml, PCMK__XA_OP_RESTART_DIGEST,
4644                     "calculated-failure-timeout");
4645         return pcmk_rc_ok;
4646     }
4647 
4648     return pcmk_rc_undetermined;
4649 }
4650 
4651 /*!
4652  * \internal
4653  * \brief Process a masked probe failure
4654  *
4655  * \param[in,out] history           Parsed action history entry
4656  * \param[in]     orig_exit_status  Action exit status before remapping
4657  * \param[in]     last_failure      Resource's last_failure entry, if known
4658  * \param[in,out] on_fail           Resource's current failure handling
4659  */
4660 static void
4661 mask_probe_failure(struct action_history *history, int orig_exit_status,
     /* [previous][next][first][last][top][bottom][index][help] */
4662                    const xmlNode *last_failure,
4663                    enum action_fail_response *on_fail)
4664 {
4665     pcmk_resource_t *ban_rsc = history->rsc;
4666 
4667     if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) {
4668         ban_rsc = uber_parent(history->rsc);
4669     }
4670 
4671     crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
4672                services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
4673                pcmk__node_name(history->node));
4674     update_resource_state(history, history->expected_exit_status, last_failure,
4675                           on_fail);
4676     crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
4677 
4678     record_failed_op(history);
4679     resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
4680                       "masked-probe-failure", history->rsc->cluster);
4681 }
4682 
4683 /*!
4684  * \internal Check whether a given failure is for a given pending action
4685  *
4686  * \param[in] history       Parsed history entry for pending action
4687  * \param[in] last_failure  Resource's last_failure entry, if known
4688  *
4689  * \return true if \p last_failure is failure of pending action in \p history,
4690  *         otherwise false
4691  * \note Both \p history and \p last_failure must come from the same
4692  *       \c PCMK__XE_LRM_RESOURCE block, as node and resource are assumed to be
4693  *       the same.
4694  */
4695 static bool
4696 failure_is_newer(const struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
4697                  const xmlNode *last_failure)
4698 {
4699     guint failure_interval_ms = 0U;
4700     long long failure_change = 0LL;
4701     long long this_change = 0LL;
4702 
4703     if (last_failure == NULL) {
4704         return false; // Resource has no last_failure entry
4705     }
4706 
4707     if (!pcmk__str_eq(history->task,
4708                       crm_element_value(last_failure, PCMK_XA_OPERATION),
4709                       pcmk__str_none)) {
4710         return false; // last_failure is for different action
4711     }
4712 
4713     if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL,
4714                               &failure_interval_ms) != pcmk_ok)
4715         || (history->interval_ms != failure_interval_ms)) {
4716         return false; // last_failure is for action with different interval
4717     }
4718 
4719     if ((pcmk__scan_ll(crm_element_value(history->xml, PCMK_XA_LAST_RC_CHANGE),
4720                        &this_change, 0LL) != pcmk_rc_ok)
4721         || (pcmk__scan_ll(crm_element_value(last_failure,
4722                                             PCMK_XA_LAST_RC_CHANGE),
4723                           &failure_change, 0LL) != pcmk_rc_ok)
4724         || (failure_change < this_change)) {
4725         return false; // Failure is not known to be newer
4726     }
4727 
4728     return true;
4729 }
4730 
4731 /*!
4732  * \internal
4733  * \brief Update a resource's role etc. for a pending action
4734  *
4735  * \param[in,out] history       Parsed history entry for pending action
4736  * \param[in]     last_failure  Resource's last_failure entry, if known
4737  */
4738 static void
4739 process_pending_action(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
4740                        const xmlNode *last_failure)
4741 {
4742     /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4743      * and there might be a RSC_monitor_INTERVAL entry with the last successful
4744      * or pending result.
4745      *
4746      * If last_failure contains the failure of the pending recurring monitor
4747      * we're processing here, and is newer, the action is no longer pending.
4748      * (Pending results have call ID -1, which sorts last, so the last failure
4749      * if any should be known.)
4750      */
4751     if (failure_is_newer(history, last_failure)) {
4752         return;
4753     }
4754 
4755     if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4756         pcmk__set_rsc_flags(history->rsc, pcmk_rsc_start_pending);
4757         set_active(history->rsc);
4758 
4759     } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4760         history->rsc->role = pcmk_role_promoted;
4761 
4762     } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4763                && history->node->details->unclean) {
4764         /* A migrate_to action is pending on a unclean source, so force a stop
4765          * on the target.
4766          */
4767         const char *migrate_target = NULL;
4768         pcmk_node_t *target = NULL;
4769 
4770         migrate_target = crm_element_value(history->xml,
4771                                            PCMK__META_MIGRATE_TARGET);
4772         target = pcmk_find_node(history->rsc->cluster, migrate_target);
4773         if (target != NULL) {
4774             stop_action(history->rsc, target, FALSE);
4775         }
4776     }
4777 
4778     if (history->rsc->pending_task != NULL) {
4779         /* There should never be multiple pending actions, but as a failsafe,
4780          * just remember the first one processed for display purposes.
4781          */
4782         return;
4783     }
4784 
4785     if (pcmk_is_probe(history->task, history->interval_ms)) {
4786         /* Pending probes are currently never displayed, even if pending
4787          * operations are requested. If we ever want to change that,
4788          * enable the below and the corresponding part of
4789          * native.c:native_pending_task().
4790          */
4791 #if 0
4792         history->rsc->pending_task = strdup("probe");
4793         history->rsc->pending_node = history->node;
4794 #endif
4795     } else {
4796         history->rsc->pending_task = strdup(history->task);
4797         history->rsc->pending_node = history->node;
4798     }
4799 }
4800 
4801 static void
4802 unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
     /* [previous][next][first][last][top][bottom][index][help] */
4803               xmlNode **last_failure, enum action_fail_response *on_fail)
4804 {
4805     int old_rc = 0;
4806     bool expired = false;
4807     pcmk_resource_t *parent = rsc;
4808     enum rsc_role_e fail_role = pcmk_role_unknown;
4809     enum action_fail_response failure_strategy = pcmk_on_fail_restart;
4810 
4811     struct action_history history = {
4812         .rsc = rsc,
4813         .node = node,
4814         .xml = xml_op,
4815         .execution_status = PCMK_EXEC_UNKNOWN,
4816     };
4817 
4818     CRM_CHECK(rsc && node && xml_op, return);
4819 
4820     history.id = pcmk__xe_id(xml_op);
4821     if (history.id == NULL) {
4822         pcmk__config_err("Ignoring resource history entry for %s on %s "
4823                          "without ID", rsc->id, pcmk__node_name(node));
4824         return;
4825     }
4826 
4827     // Task and interval
4828     history.task = crm_element_value(xml_op, PCMK_XA_OPERATION);
4829     if (history.task == NULL) {
4830         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4831                          "without " PCMK_XA_OPERATION,
4832                          history.id, rsc->id, pcmk__node_name(node));
4833         return;
4834     }
4835     crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
4836     if (!can_affect_state(&history)) {
4837         pcmk__rsc_trace(rsc,
4838                         "Ignoring resource history entry %s for %s on %s "
4839                         "with irrelevant action '%s'",
4840                         history.id, rsc->id, pcmk__node_name(node),
4841                         history.task);
4842         return;
4843     }
4844 
4845     if (unpack_action_result(&history) != pcmk_rc_ok) {
4846         return; // Error already logged
4847     }
4848 
4849     history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4850     history.key = pcmk__xe_history_key(xml_op);
4851     crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
4852 
4853     pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4854                     history.id, history.task, history.call_id,
4855                     pcmk__node_name(node),
4856                     pcmk_exec_status_str(history.execution_status),
4857                     crm_exit_str(history.exit_status));
4858 
4859     if (node->details->unclean) {
4860         pcmk__rsc_trace(rsc,
4861                         "%s is running on %s, which is unclean (further action "
4862                         "depends on value of stop's on-fail attribute)",
4863                         rsc->id, pcmk__node_name(node));
4864     }
4865 
4866     expired = check_operation_expiry(&history);
4867     old_rc = history.exit_status;
4868 
4869     remap_operation(&history, on_fail, expired);
4870 
4871     if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4872         goto done;
4873     }
4874 
4875     if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4876         mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4877         goto done;
4878     }
4879 
4880     if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
4881         parent = uber_parent(rsc);
4882     }
4883 
4884     switch (history.execution_status) {
4885         case PCMK_EXEC_PENDING:
4886             process_pending_action(&history, *last_failure);
4887             goto done;
4888 
4889         case PCMK_EXEC_DONE:
4890             update_resource_state(&history, history.exit_status, *last_failure,
4891                                   on_fail);
4892             goto done;
4893 
4894         case PCMK_EXEC_NOT_INSTALLED:
4895             unpack_failure_handling(&history, &failure_strategy, &fail_role);
4896             if (failure_strategy == pcmk_on_fail_ignore) {
4897                 crm_warn("Cannot ignore failed %s of %s on %s: "
4898                          "Resource agent doesn't exist "
4899                          CRM_XS " status=%d rc=%d id=%s",
4900                          history.task, rsc->id, pcmk__node_name(node),
4901                          history.execution_status, history.exit_status,
4902                          history.id);
4903                 /* Also for printing it as "FAILED" by marking it as
4904                  * pcmk_rsc_failed later
4905                  */
4906                 *on_fail = pcmk_on_fail_ban;
4907             }
4908             resource_location(parent, node, -PCMK_SCORE_INFINITY,
4909                               "hard-error", rsc->cluster);
4910             unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4911                                   last_failure, on_fail);
4912             goto done;
4913 
4914         case PCMK_EXEC_NOT_CONNECTED:
4915             if (pcmk__is_pacemaker_remote_node(node)
4916                 && pcmk_is_set(node->details->remote_rsc->flags,
4917                                pcmk_rsc_managed)) {
4918                 /* We should never get into a situation where a managed remote
4919                  * connection resource is considered OK but a resource action
4920                  * behind the connection gets a "not connected" status. But as a
4921                  * fail-safe in case a bug or unusual circumstances do lead to
4922                  * that, ensure the remote connection is considered failed.
4923                  */
4924                 pcmk__set_rsc_flags(node->details->remote_rsc,
4925                                     pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
4926             }
4927             break; // Not done, do error handling
4928 
4929         case PCMK_EXEC_ERROR:
4930         case PCMK_EXEC_ERROR_HARD:
4931         case PCMK_EXEC_ERROR_FATAL:
4932         case PCMK_EXEC_TIMEOUT:
4933         case PCMK_EXEC_NOT_SUPPORTED:
4934         case PCMK_EXEC_INVALID:
4935             break; // Not done, do error handling
4936 
4937         default: // No other value should be possible at this point
4938             break;
4939     }
4940 
4941     unpack_failure_handling(&history, &failure_strategy, &fail_role);
4942     if ((failure_strategy == pcmk_on_fail_ignore)
4943         || ((failure_strategy == pcmk_on_fail_restart_container)
4944             && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4945 
4946         char *last_change_s = last_change_str(xml_op);
4947 
4948         crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4949                  CRM_XS " %s",
4950                  history.task, services_ocf_exitcode_str(history.exit_status),
4951                  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4952                  pcmk__s(history.exit_reason, ""), rsc->id,
4953                  pcmk__node_name(node), last_change_s, history.id);
4954         free(last_change_s);
4955 
4956         update_resource_state(&history, history.expected_exit_status,
4957                               *last_failure, on_fail);
4958         crm_xml_add(xml_op, PCMK_XA_UNAME, node->details->uname);
4959         pcmk__set_rsc_flags(rsc, pcmk_rsc_ignore_failure);
4960 
4961         record_failed_op(&history);
4962 
4963         if ((failure_strategy == pcmk_on_fail_restart_container)
4964             && cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) {
4965             *on_fail = failure_strategy;
4966         }
4967 
4968     } else {
4969         unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4970                               last_failure, on_fail);
4971 
4972         if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4973             uint8_t log_level = LOG_ERR;
4974 
4975             if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4976                 log_level = LOG_NOTICE;
4977             }
4978             do_crm_log(log_level,
4979                        "Preventing %s from restarting on %s because "
4980                        "of hard failure (%s%s%s) " CRM_XS " %s",
4981                        parent->id, pcmk__node_name(node),
4982                        services_ocf_exitcode_str(history.exit_status),
4983                        (pcmk__str_empty(history.exit_reason)? "" : ": "),
4984                        pcmk__s(history.exit_reason, ""), history.id);
4985             resource_location(parent, node, -PCMK_SCORE_INFINITY,
4986                               "hard-error", rsc->cluster);
4987 
4988         } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4989             pcmk__sched_err("Preventing %s from restarting anywhere because "
4990                             "of fatal failure (%s%s%s) " CRM_XS " %s",
4991                             parent->id,
4992                             services_ocf_exitcode_str(history.exit_status),
4993                             (pcmk__str_empty(history.exit_reason)? "" : ": "),
4994                             pcmk__s(history.exit_reason, ""), history.id);
4995             resource_location(parent, NULL, -PCMK_SCORE_INFINITY,
4996                               "fatal-error", rsc->cluster);
4997         }
4998     }
4999 
5000 done:
5001     pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
5002                     rsc->id, pcmk__node_name(node), history.id,
5003                     pcmk_role_text(rsc->role),
5004                     pcmk_role_text(rsc->next_role));
5005 }
5006 
5007 static void
5008 add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
     /* [previous][next][first][last][top][bottom][index][help] */
5009                pcmk_scheduler_t *scheduler)
5010 {
5011     const char *cluster_name = NULL;
5012 
5013     pe_rule_eval_data_t rule_data = {
5014         .node_hash = NULL,
5015         .now = scheduler->now,
5016         .match_data = NULL,
5017         .rsc_data = NULL,
5018         .op_data = NULL
5019     };
5020 
5021     pcmk__insert_dup(node->details->attrs,
5022                      CRM_ATTR_UNAME, node->details->uname);
5023 
5024     pcmk__insert_dup(node->details->attrs, CRM_ATTR_ID, node->details->id);
5025     if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) {
5026         scheduler->dc_node = node;
5027         node->details->is_dc = TRUE;
5028         pcmk__insert_dup(node->details->attrs,
5029                          CRM_ATTR_IS_DC, PCMK_VALUE_TRUE);
5030     } else {
5031         pcmk__insert_dup(node->details->attrs,
5032                          CRM_ATTR_IS_DC, PCMK_VALUE_FALSE);
5033     }
5034 
5035     cluster_name = g_hash_table_lookup(scheduler->config_hash,
5036                                        PCMK_OPT_CLUSTER_NAME);
5037     if (cluster_name) {
5038         pcmk__insert_dup(node->details->attrs, CRM_ATTR_CLUSTER_NAME,
5039                          cluster_name);
5040     }
5041 
5042     pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES, &rule_data,
5043                                node->details->attrs, NULL, overwrite,
5044                                scheduler);
5045 
5046     pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_data,
5047                                node->details->utilization, NULL,
5048                                FALSE, scheduler);
5049 
5050     if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
5051                         pcmk__rsc_node_current) == NULL) {
5052         const char *site_name = pcmk__node_attr(node, "site-name", NULL,
5053                                                 pcmk__rsc_node_current);
5054 
5055         if (site_name) {
5056             pcmk__insert_dup(node->details->attrs,
5057                              CRM_ATTR_SITE_NAME, site_name);
5058 
5059         } else if (cluster_name) {
5060             /* Default to cluster-name if unset */
5061             pcmk__insert_dup(node->details->attrs,
5062                              CRM_ATTR_SITE_NAME, cluster_name);
5063         }
5064     }
5065 }
5066 
5067 static GList *
5068 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
     /* [previous][next][first][last][top][bottom][index][help] */
5069 {
5070     int counter = -1;
5071     int stop_index = -1;
5072     int start_index = -1;
5073 
5074     xmlNode *rsc_op = NULL;
5075 
5076     GList *gIter = NULL;
5077     GList *op_list = NULL;
5078     GList *sorted_op_list = NULL;
5079 
5080     /* extract operations */
5081     op_list = NULL;
5082     sorted_op_list = NULL;
5083 
5084     for (rsc_op = pcmk__xe_first_child(rsc_entry, NULL, NULL, NULL);
5085          rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
5086 
5087         if (pcmk__xe_is(rsc_op, PCMK__XE_LRM_RSC_OP)) {
5088             crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc);
5089             crm_xml_add(rsc_op, PCMK_XA_UNAME, node);
5090             op_list = g_list_prepend(op_list, rsc_op);
5091         }
5092     }
5093 
5094     if (op_list == NULL) {
5095         /* if there are no operations, there is nothing to do */
5096         return NULL;
5097     }
5098 
5099     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
5100 
5101     /* create active recurring operations as optional */
5102     if (active_filter == FALSE) {
5103         return sorted_op_list;
5104     }
5105 
5106     op_list = NULL;
5107 
5108     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
5109 
5110     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5111         xmlNode *rsc_op = (xmlNode *) gIter->data;
5112 
5113         counter++;
5114 
5115         if (start_index < stop_index) {
5116             crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
5117             break;
5118 
5119         } else if (counter < start_index) {
5120             crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op));
5121             continue;
5122         }
5123         op_list = g_list_append(op_list, rsc_op);
5124     }
5125 
5126     g_list_free(sorted_op_list);
5127     return op_list;
5128 }
5129 
5130 GList *
5131 find_operations(const char *rsc, const char *node, gboolean active_filter,
     /* [previous][next][first][last][top][bottom][index][help] */
5132                 pcmk_scheduler_t *scheduler)
5133 {
5134     GList *output = NULL;
5135     GList *intermediate = NULL;
5136 
5137     xmlNode *tmp = NULL;
5138     xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS,
5139                                            NULL, NULL);
5140 
5141     pcmk_node_t *this_node = NULL;
5142 
5143     xmlNode *node_state = NULL;
5144 
5145     CRM_CHECK(status != NULL, return NULL);
5146 
5147     for (node_state = pcmk__xe_first_child(status, NULL, NULL, NULL);
5148          node_state != NULL; node_state = pcmk__xe_next(node_state)) {
5149 
5150         if (pcmk__xe_is(node_state, PCMK__XE_NODE_STATE)) {
5151             const char *uname = crm_element_value(node_state, PCMK_XA_UNAME);
5152 
5153             if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5154                 continue;
5155             }
5156 
5157             this_node = pcmk_find_node(scheduler, uname);
5158             if(this_node == NULL) {
5159                 CRM_LOG_ASSERT(this_node != NULL);
5160                 continue;
5161 
5162             } else if (pcmk__is_pacemaker_remote_node(this_node)) {
5163                 determine_remote_online_status(scheduler, this_node);
5164 
5165             } else {
5166                 determine_online_status(node_state, this_node, scheduler);
5167             }
5168 
5169             if (this_node->details->online
5170                 || pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
5171                 /* offline nodes run no resources...
5172                  * unless stonith is enabled in which case we need to
5173                  *   make sure rsc start events happen after the stonith
5174                  */
5175                 xmlNode *lrm_rsc = NULL;
5176 
5177                 tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
5178                                            NULL);
5179                 tmp = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCES, NULL,
5180                                            NULL);
5181 
5182                 for (lrm_rsc = pcmk__xe_first_child(tmp, NULL, NULL, NULL);
5183                      lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) {
5184 
5185                     if (pcmk__xe_is(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
5186                         const char *rsc_id = crm_element_value(lrm_rsc,
5187                                                                PCMK_XA_ID);
5188 
5189                         if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
5190                             continue;
5191                         }
5192 
5193                         intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5194                         output = g_list_concat(output, intermediate);
5195                     }
5196                 }
5197             }
5198         }
5199     }
5200 
5201     return output;
5202 }

/* [previous][next][first][last][top][bottom][index][help] */