root/lib/pengine/unpack.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. is_dangling_guest_node
  2. pe_fence_node
  3. set_if_xpath
  4. unpack_config
  5. pe_create_node
  6. expand_remote_rsc_meta
  7. handle_startup_fencing
  8. unpack_nodes
  9. setup_container
  10. unpack_remote_nodes
  11. link_rsc2remotenode
  12. destroy_tag
  13. unpack_resources
  14. pcmk__unpack_fencing_topology
  15. unpack_tags
  16. unpack_ticket_state
  17. unpack_tickets_state
  18. unpack_handle_remote_attrs
  19. unpack_transient_attributes
  20. unpack_node_state
  21. unpack_node_history
  22. unpack_status
  23. unpack_node_member
  24. unpack_node_online
  25. unpack_node_terminate
  26. determine_online_status_no_fencing
  27. pending_too_long
  28. determine_online_status_fencing
  29. determine_remote_online_status
  30. determine_online_status
  31. pe_base_name_end
  32. clone_strip
  33. clone_zero
  34. create_fake_resource
  35. create_anonymous_orphan
  36. find_anonymous_clone
  37. unpack_find_resource
  38. process_orphan_resource
  39. process_rsc_state
  40. process_recurring
  41. calculate_active_ops
  42. unpack_shutdown_lock
  43. unpack_lrm_resource
  44. handle_orphaned_container_fillers
  45. unpack_node_lrm
  46. set_active
  47. set_node_score
  48. find_lrm_op
  49. find_lrm_resource
  50. unknown_on_node
  51. monitor_not_running_after
  52. non_monitor_after
  53. newer_state_after_migrate
  54. get_migration_node_names
  55. add_dangling_migration
  56. unpack_migrate_to_success
  57. unpack_migrate_to_failure
  58. unpack_migrate_from_failure
  59. record_failed_op
  60. last_change_str
  61. cmp_on_fail
  62. ban_from_all_nodes
  63. unpack_failure_handling
  64. unpack_rsc_op_failure
  65. block_if_unrecoverable
  66. remap_because
  67. remap_operation
  68. should_clear_for_param_change
  69. order_after_remote_fencing
  70. should_ignore_failure_timeout
  71. check_operation_expiry
  72. pe__target_rc_from_xml
  73. update_resource_state
  74. can_affect_state
  75. unpack_action_result
  76. process_expired_result
  77. mask_probe_failure
  78. failure_is_newer
  79. process_pending_action
  80. unpack_rsc_op
  81. add_node_attrs
  82. extract_operations
  83. find_operations

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <stdio.h>
  13 #include <string.h>
  14 #include <glib.h>
  15 #include <time.h>
  16 
  17 #include <crm/crm.h>
  18 #include <crm/services.h>
  19 #include <crm/common/xml.h>
  20 #include <crm/common/xml_internal.h>
  21 
  22 #include <crm/common/util.h>
  23 #include <crm/pengine/rules.h>
  24 #include <crm/pengine/internal.h>
  25 #include <pe_status_private.h>
  26 
  27 CRM_TRACE_INIT_DATA(pe_status);
  28 
  29 // A (parsed) resource action history entry
  30 struct action_history {
  31     pcmk_resource_t *rsc;       // Resource that history is for
  32     pcmk_node_t *node;        // Node that history is for
  33     xmlNode *xml;             // History entry XML
  34 
  35     // Parsed from entry XML
  36     const char *id;           // XML ID of history entry
  37     const char *key;          // Operation key of action
  38     const char *task;         // Action name
  39     const char *exit_reason;  // Exit reason given for result
  40     guint interval_ms;        // Action interval
  41     int call_id;              // Call ID of action
  42     int expected_exit_status; // Expected exit status of action
  43     int exit_status;          // Actual exit status of action
  44     int execution_status;     // Execution status of action
  45 };
  46 
  47 /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
  48  * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
  49  * flag is stringified more readably in log messages.
  50  */
  51 #define set_config_flag(scheduler, option, flag) do {                         \
  52         GHashTable *config_hash = (scheduler)->config_hash;                   \
  53         const char *scf_value = pcmk__cluster_option(config_hash, (option));  \
  54                                                                               \
  55         if (scf_value != NULL) {                                              \
  56             if (crm_is_true(scf_value)) {                                     \
  57                 (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__,   \
  58                                     LOG_TRACE, "Scheduler",                   \
  59                                     crm_system_name, (scheduler)->flags,      \
  60                                     (flag), #flag);                           \
  61             } else {                                                          \
  62                 (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
  63                                     LOG_TRACE, "Scheduler",                   \
  64                                     crm_system_name, (scheduler)->flags,      \
  65                                     (flag), #flag);                           \
  66             }                                                                 \
  67         }                                                                     \
  68     } while(0)
  69 
  70 static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
  71                           xmlNode *xml_op, xmlNode **last_failure,
  72                           enum action_fail_response *failed);
  73 static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
  74                                            pcmk_node_t *this_node);
  75 static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
  76                            bool overwrite, pcmk_scheduler_t *scheduler);
  77 static void determine_online_status(const xmlNode *node_state,
  78                                     pcmk_node_t *this_node,
  79                                     pcmk_scheduler_t *scheduler);
  80 
  81 static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
  82                             pcmk_scheduler_t *scheduler);
  83 
  84 
  85 static gboolean
  86 is_dangling_guest_node(pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
  87 {
  88     /* we are looking for a remote-node that was supposed to be mapped to a
  89      * container resource, but all traces of that container have disappeared 
  90      * from both the config and the status section. */
  91     if (pcmk__is_pacemaker_remote_node(node)
  92         && (node->details->remote_rsc != NULL)
  93         && (node->details->remote_rsc->container == NULL)
  94         && pcmk_is_set(node->details->remote_rsc->flags,
  95                        pcmk_rsc_removed_filler)) {
  96         return TRUE;
  97     }
  98 
  99     return FALSE;
 100 }
 101 
 102 /*!
 103  * \brief Schedule a fence action for a node
 104  *
 105  * \param[in,out] scheduler       Scheduler data
 106  * \param[in,out] node            Node to fence
 107  * \param[in]     reason          Text description of why fencing is needed
 108  * \param[in]     priority_delay  Whether to consider
 109  *                                \c PCMK_OPT_PRIORITY_FENCING_DELAY
 110  */
 111 void
 112 pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
 113               const char *reason, bool priority_delay)
 114 {
 115     CRM_CHECK(node, return);
 116 
 117     /* A guest node is fenced by marking its container as failed */
 118     if (pcmk__is_guest_or_bundle_node(node)) {
 119         pcmk_resource_t *rsc = node->details->remote_rsc->container;
 120 
 121         if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
 122             if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
 123                 crm_notice("Not fencing guest node %s "
 124                            "(otherwise would because %s): "
 125                            "its guest resource %s is unmanaged",
 126                            pcmk__node_name(node), reason, rsc->id);
 127             } else {
 128                 pcmk__sched_warn("Guest node %s will be fenced "
 129                                  "(by recovering its guest resource %s): %s",
 130                                  pcmk__node_name(node), rsc->id, reason);
 131 
 132                 /* We don't mark the node as unclean because that would prevent the
 133                  * node from running resources. We want to allow it to run resources
 134                  * in this transition if the recovery succeeds.
 135                  */
 136                 node->details->remote_requires_reset = TRUE;
 137                 pcmk__set_rsc_flags(rsc,
 138                                     pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
 139             }
 140         }
 141 
 142     } else if (is_dangling_guest_node(node)) {
 143         crm_info("Cleaning up dangling connection for guest node %s: "
 144                  "fencing was already done because %s, "
 145                  "and guest resource no longer exists",
 146                  pcmk__node_name(node), reason);
 147         pcmk__set_rsc_flags(node->details->remote_rsc,
 148                             pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
 149 
 150     } else if (pcmk__is_remote_node(node)) {
 151         pcmk_resource_t *rsc = node->details->remote_rsc;
 152 
 153         if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
 154             crm_notice("Not fencing remote node %s "
 155                        "(otherwise would because %s): connection is unmanaged",
 156                        pcmk__node_name(node), reason);
 157         } else if(node->details->remote_requires_reset == FALSE) {
 158             node->details->remote_requires_reset = TRUE;
 159             pcmk__sched_warn("Remote node %s %s: %s",
 160                              pcmk__node_name(node),
 161                              pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
 162                              reason);
 163         }
 164         node->details->unclean = TRUE;
 165         // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
 166         pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
 167 
 168     } else if (node->details->unclean) {
 169         crm_trace("Cluster node %s %s because %s",
 170                   pcmk__node_name(node),
 171                   pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
 172                   reason);
 173 
 174     } else {
 175         pcmk__sched_warn("Cluster node %s %s: %s",
 176                          pcmk__node_name(node),
 177                          pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
 178                          reason);
 179         node->details->unclean = TRUE;
 180         pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
 181     }
 182 }
 183 
 184 // @TODO xpaths can't handle templates, rules, or id-refs
 185 
 186 // nvpair with provides or requires set to unfencing
 187 #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR           \
 188     "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'"   \
 189     "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') "   \
 190     "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
 191 
 192 // unfencing in rsc_defaults or any resource
 193 #define XPATH_ENABLE_UNFENCING \
 194     "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES     \
 195     "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR             \
 196     "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
 197     "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
 198 
 199 static void
 200 set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 201 {
 202     xmlXPathObjectPtr result = NULL;
 203 
 204     if (!pcmk_is_set(scheduler->flags, flag)) {
 205         result = xpath_search(scheduler->input, xpath);
 206         if (result && (numXpathResults(result) > 0)) {
 207             pcmk__set_scheduler_flags(scheduler, flag);
 208         }
 209         freeXpathObject(result);
 210     }
 211 }
 212 
 213 gboolean
 214 unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 215 {
 216     const char *value = NULL;
 217     guint interval_ms = 0U;
 218     GHashTable *config_hash = pcmk__strkey_table(free, free);
 219 
 220     pe_rule_eval_data_t rule_data = {
 221         .node_hash = NULL,
 222         .now = scheduler->now,
 223         .match_data = NULL,
 224         .rsc_data = NULL,
 225         .op_data = NULL
 226     };
 227 
 228     scheduler->config_hash = config_hash;
 229 
 230     pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET, &rule_data,
 231                                config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS,
 232                                FALSE, scheduler);
 233 
 234     pcmk__validate_cluster_options(config_hash);
 235 
 236     set_config_flag(scheduler, PCMK_OPT_ENABLE_STARTUP_PROBES,
 237                     pcmk_sched_probe_resources);
 238     if (!pcmk_is_set(scheduler->flags, pcmk_sched_probe_resources)) {
 239         crm_info("Startup probes: disabled (dangerous)");
 240     }
 241 
 242     value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
 243     if (value && crm_is_true(value)) {
 244         crm_info("Watchdog-based self-fencing will be performed via SBD if "
 245                  "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
 246                  " is nonzero");
 247         pcmk__set_scheduler_flags(scheduler, pcmk_sched_have_fencing);
 248     }
 249 
 250     /* Set certain flags via xpath here, so they can be used before the relevant
 251      * configuration sections are unpacked.
 252      */
 253     set_if_xpath(pcmk_sched_enable_unfencing, XPATH_ENABLE_UNFENCING,
 254                  scheduler);
 255 
 256     value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT);
 257     pcmk_parse_interval_spec(value, &interval_ms);
 258 
 259     if (interval_ms >= INT_MAX) {
 260         scheduler->stonith_timeout = INT_MAX;
 261     } else {
 262         scheduler->stonith_timeout = (int) interval_ms;
 263     }
 264     crm_debug("STONITH timeout: %d", scheduler->stonith_timeout);
 265 
 266     set_config_flag(scheduler, PCMK_OPT_STONITH_ENABLED,
 267                     pcmk_sched_fencing_enabled);
 268     if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
 269         crm_debug("STONITH of failed nodes is enabled");
 270     } else {
 271         crm_debug("STONITH of failed nodes is disabled");
 272     }
 273 
 274     scheduler->stonith_action = pcmk__cluster_option(config_hash,
 275                                                      PCMK_OPT_STONITH_ACTION);
 276     if (!strcmp(scheduler->stonith_action, PCMK__ACTION_POWEROFF)) {
 277         pcmk__warn_once(pcmk__wo_poweroff,
 278                         "Support for " PCMK_OPT_STONITH_ACTION " of "
 279                         "'" PCMK__ACTION_POWEROFF "' is deprecated and will be "
 280                         "removed in a future release "
 281                         "(use '" PCMK_ACTION_OFF "' instead)");
 282         scheduler->stonith_action = PCMK_ACTION_OFF;
 283     }
 284     crm_trace("STONITH will %s nodes", scheduler->stonith_action);
 285 
 286     set_config_flag(scheduler, PCMK_OPT_CONCURRENT_FENCING,
 287                     pcmk_sched_concurrent_fencing);
 288     if (pcmk_is_set(scheduler->flags, pcmk_sched_concurrent_fencing)) {
 289         crm_debug("Concurrent fencing is enabled");
 290     } else {
 291         crm_debug("Concurrent fencing is disabled");
 292     }
 293 
 294     value = pcmk__cluster_option(config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY);
 295     if (value) {
 296         pcmk_parse_interval_spec(value, &interval_ms);
 297         scheduler->priority_fencing_delay = (int) (interval_ms / 1000);
 298         crm_trace("Priority fencing delay is %ds",
 299                   scheduler->priority_fencing_delay);
 300     }
 301 
 302     set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES,
 303                     pcmk_sched_stop_all);
 304     crm_debug("Stop all active resources: %s",
 305               pcmk__flag_text(scheduler->flags, pcmk_sched_stop_all));
 306 
 307     set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER,
 308                     pcmk_sched_symmetric_cluster);
 309     if (pcmk_is_set(scheduler->flags, pcmk_sched_symmetric_cluster)) {
 310         crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
 311     }
 312 
 313     value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
 314 
 315     if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
 316         scheduler->no_quorum_policy = pcmk_no_quorum_ignore;
 317 
 318     } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
 319         scheduler->no_quorum_policy = pcmk_no_quorum_freeze;
 320 
 321     } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
 322         scheduler->no_quorum_policy = pcmk_no_quorum_demote;
 323 
 324     } else if (pcmk__strcase_any_of(value, PCMK_VALUE_FENCE,
 325                                     PCMK_VALUE_FENCE_LEGACY, NULL)) {
 326         if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
 327             int do_panic = 0;
 328 
 329             crm_element_value_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC,
 330                                   &do_panic);
 331             if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
 332                 scheduler->no_quorum_policy = pcmk_no_quorum_fence;
 333             } else {
 334                 crm_notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY
 335                            " to 'stop': cluster has never had quorum");
 336                 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
 337             }
 338         } else {
 339             pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY
 340                              " to 'stop' because fencing is disabled");
 341             scheduler->no_quorum_policy = pcmk_no_quorum_stop;
 342         }
 343 
 344     } else {
 345         scheduler->no_quorum_policy = pcmk_no_quorum_stop;
 346     }
 347 
 348     switch (scheduler->no_quorum_policy) {
 349         case pcmk_no_quorum_freeze:
 350             crm_debug("On loss of quorum: Freeze resources");
 351             break;
 352         case pcmk_no_quorum_stop:
 353             crm_debug("On loss of quorum: Stop ALL resources");
 354             break;
 355         case pcmk_no_quorum_demote:
 356             crm_debug("On loss of quorum: "
 357                       "Demote promotable resources and stop other resources");
 358             break;
 359         case pcmk_no_quorum_fence:
 360             crm_notice("On loss of quorum: Fence all remaining nodes");
 361             break;
 362         case pcmk_no_quorum_ignore:
 363             crm_notice("On loss of quorum: Ignore");
 364             break;
 365     }
 366 
 367     set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_RESOURCES,
 368                     pcmk_sched_stop_removed_resources);
 369     if (pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) {
 370         crm_trace("Orphan resources are stopped");
 371     } else {
 372         crm_trace("Orphan resources are ignored");
 373     }
 374 
 375     set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_ACTIONS,
 376                     pcmk_sched_cancel_removed_actions);
 377     if (pcmk_is_set(scheduler->flags, pcmk_sched_cancel_removed_actions)) {
 378         crm_trace("Orphan resource actions are stopped");
 379     } else {
 380         crm_trace("Orphan resource actions are ignored");
 381     }
 382 
 383     value = pcmk__cluster_option(config_hash, PCMK__OPT_REMOVE_AFTER_STOP);
 384     if (value != NULL) {
 385         if (crm_is_true(value)) {
 386             pcmk__set_scheduler_flags(scheduler, pcmk_sched_remove_after_stop);
 387             pcmk__warn_once(pcmk__wo_remove_after,
 388                             "Support for the " PCMK__OPT_REMOVE_AFTER_STOP
 389                             " cluster property is deprecated and will be "
 390                             "removed in a future release");
 391         } else {
 392             pcmk__clear_scheduler_flags(scheduler,
 393                                         pcmk_sched_remove_after_stop);
 394         }
 395     }
 396 
 397     set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE,
 398                     pcmk_sched_in_maintenance);
 399     crm_trace("Maintenance mode: %s",
 400               pcmk__flag_text(scheduler->flags, pcmk_sched_in_maintenance));
 401 
 402     set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL,
 403                     pcmk_sched_start_failure_fatal);
 404     if (pcmk_is_set(scheduler->flags, pcmk_sched_start_failure_fatal)) {
 405         crm_trace("Start failures are always fatal");
 406     } else {
 407         crm_trace("Start failures are handled by failcount");
 408     }
 409 
 410     if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
 411         set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING,
 412                         pcmk_sched_startup_fencing);
 413     }
 414     if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) {
 415         crm_trace("Unseen nodes will be fenced");
 416     } else {
 417         pcmk__warn_once(pcmk__wo_blind,
 418                         "Blind faith: not fencing unseen nodes");
 419     }
 420 
 421     pe__unpack_node_health_scores(scheduler);
 422 
 423     scheduler->placement_strategy =
 424         pcmk__cluster_option(config_hash, PCMK_OPT_PLACEMENT_STRATEGY);
 425     crm_trace("Placement strategy: %s", scheduler->placement_strategy);
 426 
 427     set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK,
 428                     pcmk_sched_shutdown_lock);
 429     if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
 430         value = pcmk__cluster_option(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
 431         pcmk_parse_interval_spec(value, &(scheduler->shutdown_lock));
 432         scheduler->shutdown_lock /= 1000;
 433         crm_trace("Resources will be locked to nodes that were cleanly "
 434                   "shut down (locks expire after %s)",
 435                   pcmk__readable_interval(scheduler->shutdown_lock));
 436     } else {
 437         crm_trace("Resources will not be locked to nodes that were cleanly "
 438                   "shut down");
 439     }
 440 
 441     value = pcmk__cluster_option(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
 442     pcmk_parse_interval_spec(value, &(scheduler->node_pending_timeout));
 443     scheduler->node_pending_timeout /= 1000;
 444     if (scheduler->node_pending_timeout == 0) {
 445         crm_trace("Do not fence pending nodes");
 446     } else {
 447         crm_trace("Fence pending nodes after %s",
 448                   pcmk__readable_interval(scheduler->node_pending_timeout
 449                                           * 1000));
 450     }
 451 
 452     return TRUE;
 453 }
 454 
 455 /*!
 456  * \internal
 457  * \brief Create a new node object in scheduler data
 458  *
 459  * \param[in]     id         ID of new node
 460  * \param[in]     uname      Name of new node
 461  * \param[in]     type       Type of new node
 462  * \param[in]     score      Score of new node
 463  * \param[in,out] scheduler  Scheduler data
 464  *
 465  * \return Newly created node object
 466  * \note The returned object is part of the scheduler data and should not be
 467  *       freed separately.
 468  */
 469 pcmk_node_t *
 470 pe_create_node(const char *id, const char *uname, const char *type,
     /* [previous][next][first][last][top][bottom][index][help] */
 471                int score, pcmk_scheduler_t *scheduler)
 472 {
 473     pcmk_node_t *new_node = NULL;
 474 
 475     if (pcmk_find_node(scheduler, uname) != NULL) {
 476         pcmk__config_warn("More than one node entry has name '%s'", uname);
 477     }
 478 
 479     new_node = calloc(1, sizeof(pcmk_node_t));
 480     if (new_node == NULL) {
 481         pcmk__sched_err("Could not allocate memory for node %s", uname);
 482         return NULL;
 483     }
 484 
 485     new_node->weight = score;
 486     new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
 487 
 488     if (new_node->details == NULL) {
 489         free(new_node);
 490         pcmk__sched_err("Could not allocate memory for node %s", uname);
 491         return NULL;
 492     }
 493 
 494     crm_trace("Creating node for entry %s/%s", uname, id);
 495     new_node->details->id = id;
 496     new_node->details->uname = uname;
 497     new_node->details->online = FALSE;
 498     new_node->details->shutdown = FALSE;
 499     new_node->details->rsc_discovery_enabled = TRUE;
 500     new_node->details->running_rsc = NULL;
 501     new_node->details->data_set = scheduler;
 502 
 503     if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
 504                      pcmk__str_null_matches|pcmk__str_casei)) {
 505         new_node->details->type = pcmk_node_variant_cluster;
 506 
 507     } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
 508         new_node->details->type = pcmk_node_variant_remote;
 509         pcmk__set_scheduler_flags(scheduler, pcmk_sched_have_remote_nodes);
 510 
 511     } else {
 512         /* @COMPAT 'ping' is the default for backward compatibility, but it
 513          * should be changed to 'member' at a compatibility break
 514          */
 515         if (!pcmk__str_eq(type, PCMK__VALUE_PING, pcmk__str_casei)) {
 516             pcmk__config_warn("Node %s has unrecognized type '%s', "
 517                               "assuming '" PCMK__VALUE_PING "'",
 518                               pcmk__s(uname, "without name"), type);
 519         }
 520         pcmk__warn_once(pcmk__wo_ping_node,
 521                         "Support for nodes of type '" PCMK__VALUE_PING "' "
 522                         "(such as %s) is deprecated and will be removed in a "
 523                         "future release",
 524                         pcmk__s(uname, "unnamed node"));
 525         new_node->details->type = node_ping;
 526     }
 527 
 528     new_node->details->attrs = pcmk__strkey_table(free, free);
 529 
 530     if (pcmk__is_pacemaker_remote_node(new_node)) {
 531         pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "remote");
 532     } else {
 533         pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "cluster");
 534     }
 535 
 536     new_node->details->utilization = pcmk__strkey_table(free, free);
 537     new_node->details->digest_cache = pcmk__strkey_table(free,
 538                                                           pe__free_digests);
 539 
 540     scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
 541                                             pe__cmp_node_name);
 542     return new_node;
 543 }
 544 
 545 static const char *
 546 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
     /* [previous][next][first][last][top][bottom][index][help] */
 547 {
 548     xmlNode *attr_set = NULL;
 549     xmlNode *attr = NULL;
 550 
 551     const char *container_id = pcmk__xe_id(xml_obj);
 552     const char *remote_name = NULL;
 553     const char *remote_server = NULL;
 554     const char *remote_port = NULL;
 555     const char *connect_timeout = "60s";
 556     const char *remote_allow_migrate=NULL;
 557     const char *is_managed = NULL;
 558 
 559     for (attr_set = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
 560          attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) {
 561 
 562         if (!pcmk__xe_is(attr_set, PCMK_XE_META_ATTRIBUTES)) {
 563             continue;
 564         }
 565 
 566         for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
 567              attr != NULL; attr = pcmk__xe_next(attr)) {
 568 
 569             const char *value = crm_element_value(attr, PCMK_XA_VALUE);
 570             const char *name = crm_element_value(attr, PCMK_XA_NAME);
 571 
 572             if (name == NULL) { // Sanity
 573                 continue;
 574             }
 575 
 576             if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
 577                 remote_name = value;
 578 
 579             } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
 580                 remote_server = value;
 581 
 582             } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
 583                 remote_port = value;
 584 
 585             } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
 586                 connect_timeout = value;
 587 
 588             } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
 589                 remote_allow_migrate = value;
 590 
 591             } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
 592                 is_managed = value;
 593             }
 594         }
 595     }
 596 
 597     if (remote_name == NULL) {
 598         return NULL;
 599     }
 600 
 601     if (pe_find_resource(data->resources, remote_name) != NULL) {
 602         return NULL;
 603     }
 604 
 605     pe_create_remote_xml(parent, remote_name, container_id,
 606                          remote_allow_migrate, is_managed,
 607                          connect_timeout, remote_server, remote_port);
 608     return remote_name;
 609 }
 610 
 611 static void
 612 handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
     /* [previous][next][first][last][top][bottom][index][help] */
 613 {
 614     if ((new_node->details->type == pcmk_node_variant_remote)
 615         && (new_node->details->remote_rsc == NULL)) {
 616         /* Ignore fencing for remote nodes that don't have a connection resource
 617          * associated with them. This happens when remote node entries get left
 618          * in the nodes section after the connection resource is removed.
 619          */
 620         return;
 621     }
 622 
 623     if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) {
 624         // All nodes are unclean until we've seen their status entry
 625         new_node->details->unclean = TRUE;
 626 
 627     } else {
 628         // Blind faith ...
 629         new_node->details->unclean = FALSE;
 630     }
 631 
 632     /* We need to be able to determine if a node's status section
 633      * exists or not separate from whether the node is unclean. */
 634     new_node->details->unseen = TRUE;
 635 }
 636 
 637 gboolean
 638 unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 639 {
 640     xmlNode *xml_obj = NULL;
 641     pcmk_node_t *new_node = NULL;
 642     const char *id = NULL;
 643     const char *uname = NULL;
 644     const char *type = NULL;
 645 
 646     for (xml_obj = pcmk__xe_first_child(xml_nodes, NULL, NULL, NULL);
 647          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
 648 
 649         if (pcmk__xe_is(xml_obj, PCMK_XE_NODE)) {
 650             int score = 0;
 651             int rc = pcmk__xe_get_score(xml_obj, PCMK_XA_SCORE, &score, 0);
 652 
 653             new_node = NULL;
 654 
 655             id = crm_element_value(xml_obj, PCMK_XA_ID);
 656             uname = crm_element_value(xml_obj, PCMK_XA_UNAME);
 657             type = crm_element_value(xml_obj, PCMK_XA_TYPE);
 658             crm_trace("Processing node %s/%s", uname, id);
 659 
 660             if (id == NULL) {
 661                 pcmk__config_err("Ignoring <" PCMK_XE_NODE
 662                                  "> entry in configuration without id");
 663                 continue;
 664             }
 665             if (rc != pcmk_rc_ok) {
 666                 // Not possible with schema validation enabled
 667                 pcmk__config_warn("Using 0 as score for node %s "
 668                                   "because '%s' is not a valid score: %s",
 669                                   pcmk__s(uname, "without name"),
 670                                   crm_element_value(xml_obj, PCMK_XA_SCORE),
 671                                   pcmk_rc_str(rc));
 672             }
 673             new_node = pe_create_node(id, uname, type, score, scheduler);
 674 
 675             if (new_node == NULL) {
 676                 return FALSE;
 677             }
 678 
 679             handle_startup_fencing(scheduler, new_node);
 680 
 681             add_node_attrs(xml_obj, new_node, FALSE, scheduler);
 682 
 683             crm_trace("Done with node %s",
 684                       crm_element_value(xml_obj, PCMK_XA_UNAME));
 685         }
 686     }
 687 
 688     if (scheduler->localhost
 689         && (pcmk_find_node(scheduler, scheduler->localhost) == NULL)) {
 690         crm_info("Creating a fake local node");
 691         pe_create_node(scheduler->localhost, scheduler->localhost, NULL, 0,
 692                        scheduler);
 693     }
 694 
 695     return TRUE;
 696 }
 697 
 698 static void
 699 setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 700 {
 701     const char *container_id = NULL;
 702 
 703     if (rsc->children) {
 704         g_list_foreach(rsc->children, (GFunc) setup_container, scheduler);
 705         return;
 706     }
 707 
 708     container_id = g_hash_table_lookup(rsc->meta, PCMK__META_CONTAINER);
 709     if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) {
 710         pcmk_resource_t *container = pe_find_resource(scheduler->resources,
 711                                                       container_id);
 712 
 713         if (container) {
 714             rsc->container = container;
 715             pcmk__set_rsc_flags(container, pcmk_rsc_has_filler);
 716             container->fillers = g_list_append(container->fillers, rsc);
 717             pcmk__rsc_trace(rsc, "Resource %s's container is %s",
 718                             rsc->id, container_id);
 719         } else {
 720             pcmk__config_err("Resource %s: Unknown resource container (%s)",
 721                              rsc->id, container_id);
 722         }
 723     }
 724 }
 725 
 726 gboolean
 727 unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 728 {
 729     xmlNode *xml_obj = NULL;
 730 
 731     /* Create remote nodes and guest nodes from the resource configuration
 732      * before unpacking resources.
 733      */
 734     for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
 735          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
 736 
 737         const char *new_node_id = NULL;
 738 
 739         /* Check for remote nodes, which are defined by ocf:pacemaker:remote
 740          * primitives.
 741          */
 742         if (xml_contains_remote_node(xml_obj)) {
 743             new_node_id = pcmk__xe_id(xml_obj);
 744             /* The pcmk_find_node() check ensures we don't iterate over an
 745              * expanded node that has already been added to the node list
 746              */
 747             if (new_node_id
 748                 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
 749                 crm_trace("Found remote node %s defined by resource %s",
 750                           new_node_id, pcmk__xe_id(xml_obj));
 751                 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
 752                                0, scheduler);
 753             }
 754             continue;
 755         }
 756 
 757         /* Check for guest nodes, which are defined by special meta-attributes
 758          * of a primitive of any type (for example, VirtualDomain or Xen).
 759          */
 760         if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
 761             /* This will add an ocf:pacemaker:remote primitive to the
 762              * configuration for the guest node's connection, to be unpacked
 763              * later.
 764              */
 765             new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
 766                                                  scheduler);
 767             if (new_node_id
 768                 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
 769                 crm_trace("Found guest node %s in resource %s",
 770                           new_node_id, pcmk__xe_id(xml_obj));
 771                 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
 772                                0, scheduler);
 773             }
 774             continue;
 775         }
 776 
 777         /* Check for guest nodes inside a group. Clones are currently not
 778          * supported as guest nodes.
 779          */
 780         if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
 781             xmlNode *xml_obj2 = NULL;
 782             for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
 783                  xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) {
 784 
 785                 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
 786                                                      scheduler);
 787 
 788                 if (new_node_id
 789                     && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
 790                     crm_trace("Found guest node %s in resource %s inside group %s",
 791                               new_node_id, pcmk__xe_id(xml_obj2),
 792                               pcmk__xe_id(xml_obj));
 793                     pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
 794                                    0, scheduler);
 795                 }
 796             }
 797         }
 798     }
 799     return TRUE;
 800 }
 801 
 802 /* Call this after all the nodes and resources have been
 803  * unpacked, but before the status section is read.
 804  *
 805  * A remote node's online status is reflected by the state
 806  * of the remote node's connection resource. We need to link
 807  * the remote node to this connection resource so we can have
 808  * easy access to the connection resource during the scheduler calculations.
 809  */
 810 static void
 811 link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 812 {
 813     pcmk_node_t *remote_node = NULL;
 814 
 815     if (new_rsc->is_remote_node == FALSE) {
 816         return;
 817     }
 818 
 819     if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) {
 820         /* remote_nodes and remote_resources are not linked in quick location calculations */
 821         return;
 822     }
 823 
 824     remote_node = pcmk_find_node(scheduler, new_rsc->id);
 825     CRM_CHECK(remote_node != NULL, return);
 826 
 827     pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
 828                     new_rsc->id, pcmk__node_name(remote_node));
 829     remote_node->details->remote_rsc = new_rsc;
 830 
 831     if (new_rsc->container == NULL) {
 832         /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
 833          * the same as is done for cluster nodes.
 834          */
 835         handle_startup_fencing(scheduler, remote_node);
 836 
 837     } else {
 838         /* pe_create_node() marks the new node as "remote" or "cluster"; now
 839          * that we know the node is a guest node, update it correctly.
 840          */
 841         pcmk__insert_dup(remote_node->details->attrs,
 842                          CRM_ATTR_KIND, "container");
 843     }
 844 }
 845 
 846 static void
 847 destroy_tag(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 848 {
 849     pcmk_tag_t *tag = data;
 850 
 851     if (tag) {
 852         free(tag->id);
 853         g_list_free_full(tag->refs, free);
 854         free(tag);
 855     }
 856 }
 857 
 858 /*!
 859  * \internal
 860  * \brief Parse configuration XML for resource information
 861  *
 862  * \param[in]     xml_resources  Top of resource configuration XML
 863  * \param[in,out] scheduler      Scheduler data
 864  *
 865  * \return TRUE
 866  *
 867  * \note unpack_remote_nodes() MUST be called before this, so that the nodes can
 868  *       be used when pe__unpack_resource() calls resource_location()
 869  */
 870 gboolean
 871 unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 872 {
 873     xmlNode *xml_obj = NULL;
 874     GList *gIter = NULL;
 875 
 876     scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag);
 877 
 878     for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
 879          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
 880 
 881         pcmk_resource_t *new_rsc = NULL;
 882         const char *id = pcmk__xe_id(xml_obj);
 883 
 884         if (pcmk__str_empty(id)) {
 885             pcmk__config_err("Ignoring <%s> resource without ID",
 886                              xml_obj->name);
 887             continue;
 888         }
 889 
 890         if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
 891             if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id,
 892                                              NULL, NULL) == FALSE) {
 893                 /* Record the template's ID for the knowledge of its existence anyway. */
 894                 pcmk__insert_dup(scheduler->template_rsc_sets, id, NULL);
 895             }
 896             continue;
 897         }
 898 
 899         crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
 900         if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
 901                                 scheduler) == pcmk_rc_ok) {
 902             scheduler->resources = g_list_append(scheduler->resources, new_rsc);
 903             pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
 904 
 905         } else {
 906             pcmk__config_err("Ignoring <%s> resource '%s' "
 907                              "because configuration is invalid",
 908                              xml_obj->name, id);
 909         }
 910     }
 911 
 912     for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) {
 913         pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
 914 
 915         setup_container(rsc, scheduler);
 916         link_rsc2remotenode(scheduler, rsc);
 917     }
 918 
 919     scheduler->resources = g_list_sort(scheduler->resources,
 920                                       pe__cmp_rsc_priority);
 921     if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) {
 922         /* Ignore */
 923 
 924     } else if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)
 925                && !pcmk_is_set(scheduler->flags, pcmk_sched_have_fencing)) {
 926 
 927         pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
 928         pcmk__config_err("Either configure some or disable STONITH with the "
 929                          PCMK_OPT_STONITH_ENABLED " option");
 930         pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
 931     }
 932 
 933     return TRUE;
 934 }
 935 
 936 /*!
 937  * \internal
 938  * \brief Parse configuration XML for fencing topology information
 939  *
 940  * \param[in]     xml_fencing_topology  Top of fencing topology configuration XML
 941  * \param[in,out] scheduler             Scheduler data
 942  *
 943  * \return void
 944  */
 945 void
 946 pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 947 {
 948     xmlNode *xml_obj = NULL;
 949     int id = 0;
 950 
 951     for (xml_obj = pcmk__xe_first_child(xml_fencing_topology, PCMK_XE_FENCING_LEVEL, NULL, NULL);
 952          xml_obj != NULL; xml_obj = pcmk__xe_next_same(xml_obj)) {
 953 
 954         crm_element_value_int(xml_obj, PCMK_XA_INDEX, &id);
 955 
 956         // Ensure an ID was given
 957         if (pcmk__str_empty(pcmk__xe_id(xml_obj))) {
 958             pcmk__config_warn("Ignoring registration for topology level without ID");
 959             continue;
 960         }
 961 
 962         // Ensure level ID is in allowed range
 963         if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) {
 964             pcmk__config_warn("Ignoring topology registration with invalid level %d",
 965                                id);
 966             continue;
 967         }
 968 
 969     }
 970 }
 971 
 972 gboolean
 973 unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 974 {
 975     xmlNode *xml_tag = NULL;
 976 
 977     scheduler->tags = pcmk__strkey_table(free, destroy_tag);
 978 
 979     for (xml_tag = pcmk__xe_first_child(xml_tags, NULL, NULL, NULL);
 980          xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) {
 981 
 982         xmlNode *xml_obj_ref = NULL;
 983         const char *tag_id = pcmk__xe_id(xml_tag);
 984 
 985         if (!pcmk__xe_is(xml_tag, PCMK_XE_TAG)) {
 986             continue;
 987         }
 988 
 989         if (tag_id == NULL) {
 990             pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
 991                              (const char *) xml_tag->name);
 992             continue;
 993         }
 994 
 995         for (xml_obj_ref = pcmk__xe_first_child(xml_tag, NULL, NULL, NULL);
 996              xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
 997 
 998             const char *obj_ref = pcmk__xe_id(xml_obj_ref);
 999 
1000             if (!pcmk__xe_is(xml_obj_ref, PCMK_XE_OBJ_REF)) {
1001                 continue;
1002             }
1003 
1004             if (obj_ref == NULL) {
1005                 pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
1006                                  xml_obj_ref->name, tag_id);
1007                 continue;
1008             }
1009 
1010             if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) {
1011                 return FALSE;
1012             }
1013         }
1014     }
1015 
1016     return TRUE;
1017 }
1018 
1019 /* The ticket state section:
1020  * "/cib/status/tickets/ticket_state" */
1021 static gboolean
1022 unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1023 {
1024     const char *ticket_id = NULL;
1025     const char *granted = NULL;
1026     const char *last_granted = NULL;
1027     const char *standby = NULL;
1028     xmlAttrPtr xIter = NULL;
1029 
1030     pcmk_ticket_t *ticket = NULL;
1031 
1032     ticket_id = pcmk__xe_id(xml_ticket);
1033     if (pcmk__str_empty(ticket_id)) {
1034         return FALSE;
1035     }
1036 
1037     crm_trace("Processing ticket state for %s", ticket_id);
1038 
1039     ticket = g_hash_table_lookup(scheduler->tickets, ticket_id);
1040     if (ticket == NULL) {
1041         ticket = ticket_new(ticket_id, scheduler);
1042         if (ticket == NULL) {
1043             return FALSE;
1044         }
1045     }
1046 
1047     for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1048         const char *prop_name = (const char *)xIter->name;
1049         const char *prop_value = pcmk__xml_attr_value(xIter);
1050 
1051         if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
1052             continue;
1053         }
1054         pcmk__insert_dup(ticket->state, prop_name, prop_value);
1055     }
1056 
1057     granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
1058     if (granted && crm_is_true(granted)) {
1059         ticket->granted = TRUE;
1060         crm_info("We have ticket '%s'", ticket->id);
1061     } else {
1062         ticket->granted = FALSE;
1063         crm_info("We do not have ticket '%s'", ticket->id);
1064     }
1065 
1066     last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
1067     if (last_granted) {
1068         long long last_granted_ll = 0LL;
1069         int rc = pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1070 
1071         if (rc != pcmk_rc_ok) {
1072             crm_warn("Using %lld instead of invalid " PCMK_XA_LAST_GRANTED
1073                      " value '%s' in state for ticket %s: %s",
1074                      last_granted_ll, last_granted, ticket->id,
1075                      pcmk_rc_str(rc));
1076         }
1077         ticket->last_granted = (time_t) last_granted_ll;
1078     }
1079 
1080     standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
1081     if (standby && crm_is_true(standby)) {
1082         ticket->standby = TRUE;
1083         if (ticket->granted) {
1084             crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
1085         }
1086     } else {
1087         ticket->standby = FALSE;
1088     }
1089 
1090     crm_trace("Done with ticket state for %s", ticket_id);
1091 
1092     return TRUE;
1093 }
1094 
1095 static gboolean
1096 unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1097 {
1098     xmlNode *xml_obj = NULL;
1099 
1100     for (xml_obj = pcmk__xe_first_child(xml_tickets, NULL, NULL, NULL);
1101          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
1102 
1103         if (!pcmk__xe_is(xml_obj, PCMK__XE_TICKET_STATE)) {
1104             continue;
1105         }
1106         unpack_ticket_state(xml_obj, scheduler);
1107     }
1108 
1109     return TRUE;
1110 }
1111 
1112 static void
1113 unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
     /* [previous][next][first][last][top][bottom][index][help] */
1114                            pcmk_scheduler_t *scheduler)
1115 {
1116     const char *discovery = NULL;
1117     const xmlNode *attrs = NULL;
1118     pcmk_resource_t *rsc = NULL;
1119 
1120     if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1121         return;
1122     }
1123 
1124     if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1125         return;
1126     }
1127     crm_trace("Processing Pacemaker Remote node %s",
1128               pcmk__node_name(this_node));
1129 
1130     pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_IN_MAINTENANCE),
1131                        &(this_node->details->remote_maintenance), 0);
1132 
1133     rsc = this_node->details->remote_rsc;
1134     if (this_node->details->remote_requires_reset == FALSE) {
1135         this_node->details->unclean = FALSE;
1136         this_node->details->unseen = FALSE;
1137     }
1138     attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL,
1139                                  NULL);
1140     add_node_attrs(attrs, this_node, TRUE, scheduler);
1141 
1142     if (pe__shutdown_requested(this_node)) {
1143         crm_info("%s is shutting down", pcmk__node_name(this_node));
1144         this_node->details->shutdown = TRUE;
1145     }
1146 
1147     if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL,
1148                                     pcmk__rsc_node_current))) {
1149         crm_info("%s is in standby mode", pcmk__node_name(this_node));
1150         this_node->details->standby = TRUE;
1151     }
1152 
1153     if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
1154                                     pcmk__rsc_node_current))
1155         || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) {
1156         crm_info("%s is in maintenance mode", pcmk__node_name(this_node));
1157         this_node->details->maintenance = TRUE;
1158     }
1159 
1160     discovery = pcmk__node_attr(this_node,
1161                                 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1162                                 NULL, pcmk__rsc_node_current);
1163     if ((discovery != NULL) && !crm_is_true(discovery)) {
1164         pcmk__warn_once(pcmk__wo_rdisc_enabled,
1165                         "Support for the "
1166                         PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1167                         " node attribute is deprecated and will be removed"
1168                         " (and behave as 'true') in a future release.");
1169 
1170         if (pcmk__is_remote_node(this_node)
1171             && !pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
1172             pcmk__config_warn("Ignoring "
1173                               PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1174                               " attribute on Pacemaker Remote node %s"
1175                               " because fencing is disabled",
1176                               pcmk__node_name(this_node));
1177         } else {
1178             /* This is either a remote node with fencing enabled, or a guest
1179              * node. We don't care whether fencing is enabled when fencing guest
1180              * nodes, because they are "fenced" by recovering their containing
1181              * resource.
1182              */
1183             crm_info("%s has resource discovery disabled",
1184                      pcmk__node_name(this_node));
1185             this_node->details->rsc_discovery_enabled = FALSE;
1186         }
1187     }
1188 }
1189 
1190 /*!
1191  * \internal
1192  * \brief Unpack a cluster node's transient attributes
1193  *
1194  * \param[in]     state      CIB node state XML
1195  * \param[in,out] node       Cluster node whose attributes are being unpacked
1196  * \param[in,out] scheduler  Scheduler data
1197  */
1198 static void
1199 unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1200                             pcmk_scheduler_t *scheduler)
1201 {
1202     const char *discovery = NULL;
1203     const xmlNode *attrs = pcmk__xe_first_child(state,
1204                                                 PCMK__XE_TRANSIENT_ATTRIBUTES,
1205                                                 NULL, NULL);
1206 
1207     add_node_attrs(attrs, node, TRUE, scheduler);
1208 
1209     if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_STANDBY, NULL,
1210                                     pcmk__rsc_node_current))) {
1211         crm_info("%s is in standby mode", pcmk__node_name(node));
1212         node->details->standby = TRUE;
1213     }
1214 
1215     if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
1216                                     pcmk__rsc_node_current))) {
1217         crm_info("%s is in maintenance mode", pcmk__node_name(node));
1218         node->details->maintenance = TRUE;
1219     }
1220 
1221     discovery = pcmk__node_attr(node,
1222                                 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
1223                                 NULL, pcmk__rsc_node_current);
1224     if ((discovery != NULL) && !crm_is_true(discovery)) {
1225         pcmk__config_warn("Ignoring "
1226                           PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
1227                           " attribute for %s because disabling resource"
1228                           " discovery is not allowed for cluster nodes",
1229                           pcmk__node_name(node));
1230     }
1231 }
1232 
1233 /*!
1234  * \internal
1235  * \brief Unpack a node state entry (first pass)
1236  *
1237  * Unpack one node state entry from status. This unpacks information from the
1238  * \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not
1239  * the resource history inside it. Multiple passes through the status are needed
1240  * to fully unpack everything.
1241  *
1242  * \param[in]     state      CIB node state XML
1243  * \param[in,out] scheduler  Scheduler data
1244  */
1245 static void
1246 unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1247 {
1248     const char *id = NULL;
1249     const char *uname = NULL;
1250     pcmk_node_t *this_node = NULL;
1251 
1252     id = crm_element_value(state, PCMK_XA_ID);
1253     if (id == NULL) {
1254         pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
1255                          PCMK_XA_ID);
1256         crm_log_xml_info(state, "missing-id");
1257         return;
1258     }
1259 
1260     uname = crm_element_value(state, PCMK_XA_UNAME);
1261     if (uname == NULL) {
1262         /* If a joining peer makes the cluster acquire the quorum from corosync
1263          * meanwhile it has not joined CPG membership of pacemaker-controld yet,
1264          * it's possible that the created PCMK__XE_NODE_STATE entry doesn't have
1265          * a PCMK_XA_UNAME yet. We should recognize the node as `pending` and
1266          * wait for it to join CPG.
1267          */
1268         crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
1269                   "without " PCMK_XA_UNAME,
1270                   id);
1271     }
1272 
1273     this_node = pe_find_node_any(scheduler->nodes, id, uname);
1274     if (this_node == NULL) {
1275         crm_notice("Ignoring recorded state for removed node with name %s and "
1276                    PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id);
1277         return;
1278     }
1279 
1280     if (pcmk__is_pacemaker_remote_node(this_node)) {
1281         /* We can't determine the online status of Pacemaker Remote nodes until
1282          * after all resource history has been unpacked. In this first pass, we
1283          * do need to mark whether the node has been fenced, as this plays a
1284          * role during unpacking cluster node resource state.
1285          */
1286         pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_FENCED),
1287                            &(this_node->details->remote_was_fenced), 0);
1288         return;
1289     }
1290 
1291     unpack_transient_attributes(state, this_node, scheduler);
1292 
1293     /* Provisionally mark this cluster node as clean. We have at least seen it
1294      * in the current cluster's lifetime.
1295      */
1296     this_node->details->unclean = FALSE;
1297     this_node->details->unseen = FALSE;
1298 
1299     crm_trace("Determining online status of cluster node %s (id %s)",
1300               pcmk__node_name(this_node), id);
1301     determine_online_status(state, this_node, scheduler);
1302 
1303     if (!pcmk_is_set(scheduler->flags, pcmk_sched_quorate)
1304         && this_node->details->online
1305         && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) {
1306         /* Everything else should flow from this automatically
1307          * (at least until the scheduler becomes able to migrate off
1308          * healthy resources)
1309          */
1310         pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1311                       FALSE);
1312     }
1313 }
1314 
1315 /*!
1316  * \internal
1317  * \brief Unpack nodes' resource history as much as possible
1318  *
1319  * Unpack as many nodes' resource history as possible in one pass through the
1320  * status. We need to process Pacemaker Remote nodes' connections/containers
1321  * before unpacking their history; the connection/container history will be
1322  * in another node's history, so it might take multiple passes to unpack
1323  * everything.
1324  *
1325  * \param[in]     status     CIB XML status section
1326  * \param[in]     fence      If true, treat any not-yet-unpacked nodes as unseen
1327  * \param[in,out] scheduler  Scheduler data
1328  *
1329  * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done,
1330  *         or EAGAIN if more unpacking remains to be done)
1331  */
1332 static int
1333 unpack_node_history(const xmlNode *status, bool fence,
     /* [previous][next][first][last][top][bottom][index][help] */
1334                     pcmk_scheduler_t *scheduler)
1335 {
1336     int rc = pcmk_rc_ok;
1337 
1338     // Loop through all PCMK__XE_NODE_STATE entries in CIB status
1339     for (const xmlNode *state = pcmk__xe_first_child(status,
1340                                                      PCMK__XE_NODE_STATE, NULL,
1341                                                      NULL);
1342          state != NULL; state = pcmk__xe_next_same(state)) {
1343 
1344         const char *id = pcmk__xe_id(state);
1345         const char *uname = crm_element_value(state, PCMK_XA_UNAME);
1346         pcmk_node_t *this_node = NULL;
1347 
1348         if ((id == NULL) || (uname == NULL)) {
1349             // Warning already logged in first pass through status section
1350             crm_trace("Not unpacking resource history from malformed "
1351                       PCMK__XE_NODE_STATE " without id and/or uname");
1352             continue;
1353         }
1354 
1355         this_node = pe_find_node_any(scheduler->nodes, id, uname);
1356         if (this_node == NULL) {
1357             // Warning already logged in first pass through status section
1358             crm_trace("Not unpacking resource history for node %s because "
1359                       "no longer in configuration", id);
1360             continue;
1361         }
1362 
1363         if (this_node->details->unpacked) {
1364             crm_trace("Not unpacking resource history for node %s because "
1365                       "already unpacked", id);
1366             continue;
1367         }
1368 
1369         if (fence) {
1370             // We're processing all remaining nodes
1371 
1372         } else if (pcmk__is_guest_or_bundle_node(this_node)) {
1373             /* We can unpack a guest node's history only after we've unpacked
1374              * other resource history to the point that we know that the node's
1375              * connection and containing resource are both up.
1376              */
1377             pcmk_resource_t *rsc = this_node->details->remote_rsc;
1378 
1379             if ((rsc == NULL) || (rsc->role != pcmk_role_started)
1380                 || (rsc->container->role != pcmk_role_started)) {
1381                 crm_trace("Not unpacking resource history for guest node %s "
1382                           "because container and connection are not known to "
1383                           "be up", id);
1384                 continue;
1385             }
1386 
1387         } else if (pcmk__is_remote_node(this_node)) {
1388             /* We can unpack a remote node's history only after we've unpacked
1389              * other resource history to the point that we know that the node's
1390              * connection is up, with the exception of when shutdown locks are
1391              * in use.
1392              */
1393             pcmk_resource_t *rsc = this_node->details->remote_rsc;
1394 
1395             if ((rsc == NULL)
1396                 || (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)
1397                     && (rsc->role != pcmk_role_started))) {
1398                 crm_trace("Not unpacking resource history for remote node %s "
1399                           "because connection is not known to be up", id);
1400                 continue;
1401             }
1402 
1403         /* If fencing and shutdown locks are disabled and we're not processing
1404          * unseen nodes, then we don't want to unpack offline nodes until online
1405          * nodes have been unpacked. This allows us to number active clone
1406          * instances first.
1407          */
1408         } else if (!pcmk_any_flags_set(scheduler->flags,
1409                                        pcmk_sched_fencing_enabled
1410                                        |pcmk_sched_shutdown_lock)
1411                    && !this_node->details->online) {
1412             crm_trace("Not unpacking resource history for offline "
1413                       "cluster node %s", id);
1414             continue;
1415         }
1416 
1417         if (pcmk__is_pacemaker_remote_node(this_node)) {
1418             determine_remote_online_status(scheduler, this_node);
1419             unpack_handle_remote_attrs(this_node, state, scheduler);
1420         }
1421 
1422         crm_trace("Unpacking resource history for %snode %s",
1423                   (fence? "unseen " : ""), id);
1424 
1425         this_node->details->unpacked = TRUE;
1426         unpack_node_lrm(this_node, state, scheduler);
1427 
1428         rc = EAGAIN; // Other node histories might depend on this one
1429     }
1430     return rc;
1431 }
1432 
1433 /* remove nodes that are down, stopping */
1434 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1435 /* anything else? */
1436 gboolean
1437 unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1438 {
1439     xmlNode *state = NULL;
1440 
1441     crm_trace("Beginning unpack");
1442 
1443     if (scheduler->tickets == NULL) {
1444         scheduler->tickets = pcmk__strkey_table(free, destroy_ticket);
1445     }
1446 
1447     for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
1448          state = pcmk__xe_next(state)) {
1449 
1450         if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
1451             unpack_tickets_state((xmlNode *) state, scheduler);
1452 
1453         } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1454             unpack_node_state(state, scheduler);
1455         }
1456     }
1457 
1458     while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1459         crm_trace("Another pass through node resource histories is needed");
1460     }
1461 
1462     // Now catch any nodes we didn't see
1463     unpack_node_history(status,
1464                         pcmk_is_set(scheduler->flags,
1465                                     pcmk_sched_fencing_enabled),
1466                         scheduler);
1467 
1468     /* Now that we know where resources are, we can schedule stops of containers
1469      * with failed bundle connections
1470      */
1471     if (scheduler->stop_needed != NULL) {
1472         for (GList *item = scheduler->stop_needed; item; item = item->next) {
1473             pcmk_resource_t *container = item->data;
1474             pcmk_node_t *node = pcmk__current_node(container);
1475 
1476             if (node) {
1477                 stop_action(container, node, FALSE);
1478             }
1479         }
1480         g_list_free(scheduler->stop_needed);
1481         scheduler->stop_needed = NULL;
1482     }
1483 
1484     /* Now that we know status of all Pacemaker Remote connections and nodes,
1485      * we can stop connections for node shutdowns, and check the online status
1486      * of remote/guest nodes that didn't have any node history to unpack.
1487      */
1488     for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1489         pcmk_node_t *this_node = gIter->data;
1490 
1491         if (!pcmk__is_pacemaker_remote_node(this_node)) {
1492             continue;
1493         }
1494         if (this_node->details->shutdown
1495             && (this_node->details->remote_rsc != NULL)) {
1496             pe__set_next_role(this_node->details->remote_rsc, pcmk_role_stopped,
1497                               "remote shutdown");
1498         }
1499         if (!this_node->details->unpacked) {
1500             determine_remote_online_status(scheduler, this_node);
1501         }
1502     }
1503 
1504     return TRUE;
1505 }
1506 
1507 /*!
1508  * \internal
1509  * \brief Unpack node's time when it became a member at the cluster layer
1510  *
1511  * \param[in]     node_state  Node's \c PCMK__XE_NODE_STATE entry
1512  * \param[in,out] scheduler   Scheduler data
1513  *
1514  * \return Epoch time when node became a cluster member
1515  *         (or scheduler effective time for legacy entries) if a member,
1516  *         0 if not a member, or -1 if no valid information available
1517  */
1518 static long long
1519 unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
1520 {
1521     const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
1522     int member = 0;
1523 
1524     if (member_time == NULL) {
1525         return -1LL;
1526 
1527     } else if (crm_str_to_boolean(member_time, &member) == 1) {
1528         /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1529          * recorded as a boolean for a DC < 2.1.7, or the node is pending
1530          * shutdown and has left the CPG, in which case it was set to 1 to avoid
1531          * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT.
1532          *
1533          * We return the effective time for in_ccm=1 because what's important to
1534          * avoid fencing is that effective time minus this value is less than
1535          * the pending node timeout.
1536          */
1537         return member? (long long) get_effective_time(scheduler) : 0LL;
1538 
1539     } else {
1540         long long when_member = 0LL;
1541 
1542         if ((pcmk__scan_ll(member_time, &when_member,
1543                            0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1544             crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
1545                      " in " PCMK__XE_NODE_STATE " entry", member_time);
1546             return -1LL;
1547         }
1548         return when_member;
1549     }
1550 }
1551 
1552 /*!
1553  * \internal
1554  * \brief Unpack node's time when it became online in process group
1555  *
1556  * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry
1557  *
1558  * \return Epoch time when node became online in process group (or 0 if not
1559  *         online, or 1 for legacy online entries)
1560  */
1561 static long long
1562 unpack_node_online(const xmlNode *node_state)
     /* [previous][next][first][last][top][bottom][index][help] */
1563 {
1564     const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD);
1565 
1566     // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1567     if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
1568                      pcmk__str_casei|pcmk__str_null_matches)) {
1569         return 0LL;
1570 
1571     } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1572         return 1LL;
1573 
1574     } else {
1575         long long when_online = 0LL;
1576 
1577         if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1578             || (when_online < 0)) {
1579             crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
1580                      PCMK__XE_NODE_STATE " entry, assuming offline", peer_time);
1581             return 0LL;
1582         }
1583         return when_online;
1584     }
1585 }
1586 
1587 /*!
1588  * \internal
1589  * \brief Unpack node attribute for user-requested fencing
1590  *
1591  * \param[in] node        Node to check
1592  * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry in CIB status
1593  *
1594  * \return \c true if fencing has been requested for \p node, otherwise \c false
1595  */
1596 static bool
1597 unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
     /* [previous][next][first][last][top][bottom][index][help] */
1598 {
1599     long long value = 0LL;
1600     int value_i = 0;
1601     int rc = pcmk_rc_ok;
1602     const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
1603                                           NULL, pcmk__rsc_node_current);
1604 
1605     // Value may be boolean or an epoch time
1606     if (crm_str_to_boolean(value_s, &value_i) == 1) {
1607         return (value_i != 0);
1608     }
1609     rc = pcmk__scan_ll(value_s, &value, 0LL);
1610     if (rc == pcmk_rc_ok) {
1611         return (value > 0);
1612     }
1613     crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1614              "node attribute for %s: %s",
1615              value_s, pcmk__node_name(node), pcmk_rc_str(rc));
1616     return false;
1617 }
1618 
1619 static gboolean
1620 determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
     /* [previous][next][first][last][top][bottom][index][help] */
1621                                    const xmlNode *node_state,
1622                                    pcmk_node_t *this_node)
1623 {
1624     gboolean online = FALSE;
1625     const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1626     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1627     long long when_member = unpack_node_member(node_state, scheduler);
1628     long long when_online = unpack_node_online(node_state);
1629 
1630     if (when_member <= 0) {
1631         crm_trace("Node %s is %sdown", pcmk__node_name(this_node),
1632                   ((when_member < 0)? "presumed " : ""));
1633 
1634     } else if (when_online > 0) {
1635         if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1636             online = TRUE;
1637         } else {
1638             crm_debug("Node %s is not ready to run resources: %s",
1639                       pcmk__node_name(this_node), join);
1640         }
1641 
1642     } else if (this_node->details->expected_up == FALSE) {
1643         crm_trace("Node %s controller is down: "
1644                   "member@%lld online@%lld join=%s expected=%s",
1645                   pcmk__node_name(this_node), when_member, when_online,
1646                   pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1647 
1648     } else {
1649         /* mark it unclean */
1650         pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1651         crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
1652                  pcmk__node_name(this_node), when_member, when_online,
1653                  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1654     }
1655     return online;
1656 }
1657 
1658 /*!
1659  * \internal
1660  * \brief Check whether a node has taken too long to join controller group
1661  *
1662  * \param[in,out] scheduler    Scheduler data
1663  * \param[in]     node         Node to check
1664  * \param[in]     when_member  Epoch time when node became a cluster member
1665  * \param[in]     when_online  Epoch time when node joined controller group
1666  *
1667  * \return true if node has been pending (on the way up) longer than
1668  *         \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false
1669  * \note This will also update the cluster's recheck time if appropriate.
1670  */
1671 static inline bool
1672 pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1673                  long long when_member, long long when_online)
1674 {
1675     if ((scheduler->node_pending_timeout > 0)
1676         && (when_member > 0) && (when_online <= 0)) {
1677         // There is a timeout on pending nodes, and node is pending
1678 
1679         time_t timeout = when_member + scheduler->node_pending_timeout;
1680 
1681         if (get_effective_time(node->details->data_set) >= timeout) {
1682             return true; // Node has timed out
1683         }
1684 
1685         // Node is pending, but still has time
1686         pe__update_recheck_time(timeout, scheduler, "pending node timeout");
1687     }
1688     return false;
1689 }
1690 
1691 static bool
1692 determine_online_status_fencing(pcmk_scheduler_t *scheduler,
     /* [previous][next][first][last][top][bottom][index][help] */
1693                                 const xmlNode *node_state,
1694                                 pcmk_node_t *this_node)
1695 {
1696     bool termination_requested = unpack_node_terminate(this_node, node_state);
1697     const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1698     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1699     long long when_member = unpack_node_member(node_state, scheduler);
1700     long long when_online = unpack_node_online(node_state);
1701 
1702 /*
1703   - PCMK__XA_JOIN          ::= member|down|pending|banned
1704   - PCMK_XA_EXPECTED       ::= member|down
1705 
1706   @COMPAT with entries recorded for DCs < 2.1.7
1707   - PCMK__XA_IN_CCM        ::= true|false
1708   - PCMK_XA_CRMD           ::= online|offline
1709 
1710   Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1711   - PCMK__XA_IN_CCM        ::= <timestamp>|0
1712   Since when node has been a cluster member. A value 0 of means the node is not
1713   a cluster member.
1714 
1715   - PCMK_XA_CRMD           ::= <timestamp>|0
1716   Since when peer has been online in CPG. A value 0 means the peer is offline
1717   in CPG.
1718 */
1719 
1720     crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1721               pcmk__node_name(this_node), when_member, when_online,
1722               pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1723               (termination_requested? " (termination requested)" : ""));
1724 
1725     if (this_node->details->shutdown) {
1726         crm_debug("%s is shutting down", pcmk__node_name(this_node));
1727 
1728         /* Slightly different criteria since we can't shut down a dead peer */
1729         return (when_online > 0);
1730     }
1731 
1732     if (when_member < 0) {
1733         pe_fence_node(scheduler, this_node,
1734                       "peer has not been seen by the cluster", FALSE);
1735         return false;
1736     }
1737 
1738     if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1739         pe_fence_node(scheduler, this_node,
1740                       "peer failed Pacemaker membership criteria", FALSE);
1741 
1742     } else if (termination_requested) {
1743         if ((when_member <= 0) && (when_online <= 0)
1744             && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1745             crm_info("%s was fenced as requested", pcmk__node_name(this_node));
1746             return false;
1747         }
1748         pe_fence_node(scheduler, this_node, "fencing was requested", false);
1749 
1750     } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1751                             pcmk__str_null_matches)) {
1752 
1753         if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1754             pe_fence_node(scheduler, this_node,
1755                           "peer pending timed out on joining the process group",
1756                           FALSE);
1757 
1758         } else if ((when_member > 0) || (when_online > 0)) {
1759             crm_info("- %s is not ready to run resources",
1760                      pcmk__node_name(this_node));
1761             this_node->details->standby = TRUE;
1762             this_node->details->pending = TRUE;
1763 
1764         } else {
1765             crm_trace("%s is down or still coming up",
1766                       pcmk__node_name(this_node));
1767         }
1768 
1769     } else if (when_member <= 0) {
1770         // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
1771         pe_fence_node(scheduler, this_node,
1772                       "peer is no longer part of the cluster", TRUE);
1773 
1774     } else if (when_online <= 0) {
1775         pe_fence_node(scheduler, this_node,
1776                       "peer process is no longer available", FALSE);
1777 
1778         /* Everything is running at this point, now check join state */
1779 
1780     } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1781         crm_info("%s is active", pcmk__node_name(this_node));
1782 
1783     } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
1784                                 CRMD_JOINSTATE_DOWN, NULL)) {
1785         crm_info("%s is not ready to run resources",
1786                  pcmk__node_name(this_node));
1787         this_node->details->standby = TRUE;
1788         this_node->details->pending = TRUE;
1789 
1790     } else {
1791         pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1792                       FALSE);
1793     }
1794 
1795     return (when_member > 0);
1796 }
1797 
1798 static void
1799 determine_remote_online_status(pcmk_scheduler_t *scheduler,
     /* [previous][next][first][last][top][bottom][index][help] */
1800                                pcmk_node_t *this_node)
1801 {
1802     pcmk_resource_t *rsc = this_node->details->remote_rsc;
1803     pcmk_resource_t *container = NULL;
1804     pcmk_node_t *host = NULL;
1805 
1806     /* If there is a node state entry for a (former) Pacemaker Remote node
1807      * but no resource creating that node, the node's connection resource will
1808      * be NULL. Consider it an offline remote node in that case.
1809      */
1810     if (rsc == NULL) {
1811         this_node->details->online = FALSE;
1812         goto remote_online_done;
1813     }
1814 
1815     container = rsc->container;
1816 
1817     if (container && pcmk__list_of_1(rsc->running_on)) {
1818         host = rsc->running_on->data;
1819     }
1820 
1821     /* If the resource is currently started, mark it online. */
1822     if (rsc->role == pcmk_role_started) {
1823         crm_trace("%s node %s presumed ONLINE because connection resource is started",
1824                   (container? "Guest" : "Remote"), this_node->details->id);
1825         this_node->details->online = TRUE;
1826     }
1827 
1828     /* consider this node shutting down if transitioning start->stop */
1829     if ((rsc->role == pcmk_role_started)
1830         && (rsc->next_role == pcmk_role_stopped)) {
1831 
1832         crm_trace("%s node %s shutting down because connection resource is stopping",
1833                   (container? "Guest" : "Remote"), this_node->details->id);
1834         this_node->details->shutdown = TRUE;
1835     }
1836 
1837     /* Now check all the failure conditions. */
1838     if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) {
1839         crm_trace("Guest node %s UNCLEAN because guest resource failed",
1840                   this_node->details->id);
1841         this_node->details->online = FALSE;
1842         this_node->details->remote_requires_reset = TRUE;
1843 
1844     } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
1845         crm_trace("%s node %s OFFLINE because connection resource failed",
1846                   (container? "Guest" : "Remote"), this_node->details->id);
1847         this_node->details->online = FALSE;
1848 
1849     } else if ((rsc->role == pcmk_role_stopped)
1850                || ((container != NULL)
1851                    && (container->role == pcmk_role_stopped))) {
1852 
1853         crm_trace("%s node %s OFFLINE because its resource is stopped",
1854                   (container? "Guest" : "Remote"), this_node->details->id);
1855         this_node->details->online = FALSE;
1856         this_node->details->remote_requires_reset = FALSE;
1857 
1858     } else if (host && (host->details->online == FALSE)
1859                && host->details->unclean) {
1860         crm_trace("Guest node %s UNCLEAN because host is unclean",
1861                   this_node->details->id);
1862         this_node->details->online = FALSE;
1863         this_node->details->remote_requires_reset = TRUE;
1864     }
1865 
1866 remote_online_done:
1867     crm_trace("Remote node %s online=%s",
1868         this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1869 }
1870 
1871 static void
1872 determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
     /* [previous][next][first][last][top][bottom][index][help] */
1873                         pcmk_scheduler_t *scheduler)
1874 {
1875     gboolean online = FALSE;
1876     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1877 
1878     CRM_CHECK(this_node != NULL, return);
1879 
1880     this_node->details->shutdown = FALSE;
1881     this_node->details->expected_up = FALSE;
1882 
1883     if (pe__shutdown_requested(this_node)) {
1884         this_node->details->shutdown = TRUE;
1885 
1886     } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1887         this_node->details->expected_up = TRUE;
1888     }
1889 
1890     if (this_node->details->type == node_ping) {
1891         this_node->details->unclean = FALSE;
1892         online = FALSE;         /* As far as resource management is concerned,
1893                                  * the node is safely offline.
1894                                  * Anyone caught abusing this logic will be shot
1895                                  */
1896 
1897     } else if (!pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
1898         online = determine_online_status_no_fencing(scheduler, node_state,
1899                                                     this_node);
1900 
1901     } else {
1902         online = determine_online_status_fencing(scheduler, node_state,
1903                                                  this_node);
1904     }
1905 
1906     if (online) {
1907         this_node->details->online = TRUE;
1908 
1909     } else {
1910         /* remove node from contention */
1911         this_node->fixed = TRUE; // @COMPAT deprecated and unused
1912         this_node->weight = -PCMK_SCORE_INFINITY;
1913     }
1914 
1915     if (online && this_node->details->shutdown) {
1916         /* don't run resources here */
1917         this_node->fixed = TRUE; // @COMPAT deprecated and unused
1918         this_node->weight = -PCMK_SCORE_INFINITY;
1919     }
1920 
1921     if (this_node->details->type == node_ping) {
1922         crm_info("%s is not a Pacemaker node", pcmk__node_name(this_node));
1923 
1924     } else if (this_node->details->unclean) {
1925         pcmk__sched_warn("%s is unclean", pcmk__node_name(this_node));
1926 
1927     } else if (this_node->details->online) {
1928         crm_info("%s is %s", pcmk__node_name(this_node),
1929                  this_node->details->shutdown ? "shutting down" :
1930                  this_node->details->pending ? "pending" :
1931                  this_node->details->standby ? "standby" :
1932                  this_node->details->maintenance ? "maintenance" : "online");
1933 
1934     } else {
1935         crm_trace("%s is offline", pcmk__node_name(this_node));
1936     }
1937 }
1938 
1939 /*!
1940  * \internal
1941  * \brief Find the end of a resource's name, excluding any clone suffix
1942  *
1943  * \param[in] id  Resource ID to check
1944  *
1945  * \return Pointer to last character of resource's base name
1946  */
1947 const char *
1948 pe_base_name_end(const char *id)
     /* [previous][next][first][last][top][bottom][index][help] */
1949 {
1950     if (!pcmk__str_empty(id)) {
1951         const char *end = id + strlen(id) - 1;
1952 
1953         for (const char *s = end; s > id; --s) {
1954             switch (*s) {
1955                 case '0':
1956                 case '1':
1957                 case '2':
1958                 case '3':
1959                 case '4':
1960                 case '5':
1961                 case '6':
1962                 case '7':
1963                 case '8':
1964                 case '9':
1965                     break;
1966                 case ':':
1967                     return (s == end)? s : (s - 1);
1968                 default:
1969                     return end;
1970             }
1971         }
1972         return end;
1973     }
1974     return NULL;
1975 }
1976 
1977 /*!
1978  * \internal
1979  * \brief Get a resource name excluding any clone suffix
1980  *
1981  * \param[in] last_rsc_id  Resource ID to check
1982  *
1983  * \return Pointer to newly allocated string with resource's base name
1984  * \note It is the caller's responsibility to free() the result.
1985  *       This asserts on error, so callers can assume result is not NULL.
1986  */
1987 char *
1988 clone_strip(const char *last_rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1989 {
1990     const char *end = pe_base_name_end(last_rsc_id);
1991     char *basename = NULL;
1992 
1993     pcmk__assert(end != NULL);
1994     basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1995     pcmk__assert(basename != NULL);
1996     return basename;
1997 }
1998 
1999 /*!
2000  * \internal
2001  * \brief Get the name of the first instance of a cloned resource
2002  *
2003  * \param[in] last_rsc_id  Resource ID to check
2004  *
2005  * \return Pointer to newly allocated string with resource's base name plus :0
2006  * \note It is the caller's responsibility to free() the result.
2007  *       This asserts on error, so callers can assume result is not NULL.
2008  */
2009 char *
2010 clone_zero(const char *last_rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
2011 {
2012     const char *end = pe_base_name_end(last_rsc_id);
2013     size_t base_name_len = end - last_rsc_id + 1;
2014     char *zero = NULL;
2015 
2016     pcmk__assert(end != NULL);
2017     zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
2018     memcpy(zero, last_rsc_id, base_name_len);
2019     zero[base_name_len] = ':';
2020     zero[base_name_len + 1] = '0';
2021     return zero;
2022 }
2023 
2024 static pcmk_resource_t *
2025 create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
     /* [previous][next][first][last][top][bottom][index][help] */
2026                      pcmk_scheduler_t *scheduler)
2027 {
2028     pcmk_resource_t *rsc = NULL;
2029     xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
2030 
2031     pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
2032     crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
2033     crm_log_xml_debug(xml_rsc, "Orphan resource");
2034 
2035     if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
2036         return NULL;
2037     }
2038 
2039     if (xml_contains_remote_node(xml_rsc)) {
2040         pcmk_node_t *node;
2041 
2042         crm_debug("Detected orphaned remote node %s", rsc_id);
2043         node = pcmk_find_node(scheduler, rsc_id);
2044         if (node == NULL) {
2045             node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, 0,
2046                                   scheduler);
2047         }
2048         link_rsc2remotenode(scheduler, rsc);
2049 
2050         if (node) {
2051             crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
2052             node->details->shutdown = TRUE;
2053         }
2054     }
2055 
2056     if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) {
2057         /* This orphaned rsc needs to be mapped to a container. */
2058         crm_trace("Detected orphaned container filler %s", rsc_id);
2059         pcmk__set_rsc_flags(rsc, pcmk_rsc_removed_filler);
2060     }
2061     pcmk__set_rsc_flags(rsc, pcmk_rsc_removed);
2062     scheduler->resources = g_list_append(scheduler->resources, rsc);
2063     return rsc;
2064 }
2065 
2066 /*!
2067  * \internal
2068  * \brief Create orphan instance for anonymous clone resource history
2069  *
2070  * \param[in,out] parent     Clone resource that orphan will be added to
2071  * \param[in]     rsc_id     Orphan's resource ID
2072  * \param[in]     node       Where orphan is active (for logging only)
2073  * \param[in,out] scheduler  Scheduler data
2074  *
2075  * \return Newly added orphaned instance of \p parent
2076  */
2077 static pcmk_resource_t *
2078 create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
     /* [previous][next][first][last][top][bottom][index][help] */
2079                         const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2080 {
2081     pcmk_resource_t *top = pe__create_clone_child(parent, scheduler);
2082 
2083     // find_rsc() because we might be a cloned group
2084     pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL,
2085                                                pcmk_rsc_match_clone_only);
2086 
2087     pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s",
2088                     top->id, parent->id, rsc_id, pcmk__node_name(node));
2089     return orphan;
2090 }
2091 
2092 /*!
2093  * \internal
2094  * \brief Check a node for an instance of an anonymous clone
2095  *
2096  * Return a child instance of the specified anonymous clone, in order of
2097  * preference: (1) the instance running on the specified node, if any;
2098  * (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX
2099  * instances); (3) a newly created orphan (that is, \c PCMK_META_CLONE_MAX
2100  * instances are already active).
2101  *
2102  * \param[in,out] scheduler  Scheduler data
2103  * \param[in]     node       Node on which to check for instance
2104  * \param[in,out] parent     Clone to check
2105  * \param[in]     rsc_id     Name of cloned resource in history (no instance)
2106  */
2107 static pcmk_resource_t *
2108 find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2109                      pcmk_resource_t *parent, const char *rsc_id)
2110 {
2111     GList *rIter = NULL;
2112     pcmk_resource_t *rsc = NULL;
2113     pcmk_resource_t *inactive_instance = NULL;
2114     gboolean skip_inactive = FALSE;
2115 
2116     pcmk__assert(pcmk__is_anonymous_clone(parent));
2117 
2118     // Check for active (or partially active, for cloned groups) instance
2119     pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
2120                     rsc_id, pcmk__node_name(node), parent->id);
2121     for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
2122         GList *locations = NULL;
2123         pcmk_resource_t *child = rIter->data;
2124 
2125         /* Check whether this instance is already known to be active or pending
2126          * anywhere, at this stage of unpacking. Because this function is called
2127          * for a resource before the resource's individual operation history
2128          * entries are unpacked, locations will generally not contain the
2129          * desired node.
2130          *
2131          * However, there are three exceptions:
2132          * (1) when child is a cloned group and we have already unpacked the
2133          *     history of another member of the group on the same node;
2134          * (2) when we've already unpacked the history of another numbered
2135          *     instance on the same node (which can happen if
2136          *     PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
2137          * (3) when we re-run calculations on the same scheduler data as part of
2138          *     a simulation.
2139          */
2140         child->fns->location(child, &locations, 2);
2141         if (locations) {
2142             /* We should never associate the same numbered anonymous clone
2143              * instance with multiple nodes, and clone instances can't migrate,
2144              * so there must be only one location, regardless of history.
2145              */
2146             CRM_LOG_ASSERT(locations->next == NULL);
2147 
2148             if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
2149                 /* This child instance is active on the requested node, so check
2150                  * for a corresponding configured resource. We use find_rsc()
2151                  * instead of child because child may be a cloned group, and we
2152                  * need the particular member corresponding to rsc_id.
2153                  *
2154                  * If the history entry is orphaned, rsc will be NULL.
2155                  */
2156                 rsc = parent->fns->find_rsc(child, rsc_id, NULL,
2157                                             pcmk_rsc_match_clone_only);
2158                 if (rsc) {
2159                     /* If there are multiple instance history entries for an
2160                      * anonymous clone in a single node's history (which can
2161                      * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
2162                      * to false), we want to consider the instances beyond the
2163                      * first as orphans, even if there are inactive instance
2164                      * numbers available.
2165                      */
2166                     if (rsc->running_on) {
2167                         crm_notice("Active (now-)anonymous clone %s has "
2168                                    "multiple (orphan) instance histories on %s",
2169                                    parent->id, pcmk__node_name(node));
2170                         skip_inactive = TRUE;
2171                         rsc = NULL;
2172                     } else {
2173                         pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
2174                     }
2175                 }
2176             }
2177             g_list_free(locations);
2178 
2179         } else {
2180             pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
2181             if (!skip_inactive && !inactive_instance
2182                 && !pcmk_is_set(child->flags, pcmk_rsc_blocked)) {
2183                 // Remember one inactive instance in case we don't find active
2184                 inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
2185                                                           pcmk_rsc_match_clone_only);
2186 
2187                 /* ... but don't use it if it was already associated with a
2188                  * pending action on another node
2189                  */
2190                 if ((inactive_instance != NULL) &&
2191                     (inactive_instance->pending_node != NULL) &&
2192                     !pcmk__same_node(inactive_instance->pending_node, node)) {
2193                     inactive_instance = NULL;
2194                 }
2195             }
2196         }
2197     }
2198 
2199     if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2200         pcmk__rsc_trace(parent, "Resource %s, empty slot",
2201                         inactive_instance->id);
2202         rsc = inactive_instance;
2203     }
2204 
2205     /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
2206      * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
2207      * don't want to consume a valid instance number for unclean nodes. Such
2208      * instances may appear to be active according to the history, but should be
2209      * considered inactive, so we can start an instance elsewhere. Treat such
2210      * instances as orphans.
2211      *
2212      * An exception is instances running on guest nodes -- since guest node
2213      * "fencing" is actually just a resource stop, requires shouldn't apply.
2214      *
2215      * @TODO Ideally, we'd use an inactive instance number if it is not needed
2216      * for any clean instances. However, we don't know that at this point.
2217      */
2218     if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
2219         && (!node->details->online || node->details->unclean)
2220         && !pcmk__is_guest_or_bundle_node(node)
2221         && !pe__is_universal_clone(parent, scheduler)) {
2222 
2223         rsc = NULL;
2224     }
2225 
2226     if (rsc == NULL) {
2227         rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
2228         pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id);
2229     }
2230     return rsc;
2231 }
2232 
2233 static pcmk_resource_t *
2234 unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2235                      const char *rsc_id)
2236 {
2237     pcmk_resource_t *rsc = NULL;
2238     pcmk_resource_t *parent = NULL;
2239 
2240     crm_trace("looking for %s", rsc_id);
2241     rsc = pe_find_resource(scheduler->resources, rsc_id);
2242 
2243     if (rsc == NULL) {
2244         /* If we didn't find the resource by its name in the operation history,
2245          * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
2246          * we create a single :0 orphan to match against here.
2247          */
2248         char *clone0_id = clone_zero(rsc_id);
2249         pcmk_resource_t *clone0 = pe_find_resource(scheduler->resources,
2250                                                    clone0_id);
2251 
2252         if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) {
2253             rsc = clone0;
2254             parent = uber_parent(clone0);
2255             crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2256         } else {
2257             crm_trace("%s is not known as %s either (orphan)",
2258                       rsc_id, clone0_id);
2259         }
2260         free(clone0_id);
2261 
2262     } else if (rsc->variant > pcmk_rsc_variant_primitive) {
2263         crm_trace("Resource history for %s is orphaned because it is no longer primitive",
2264                   rsc_id);
2265         return NULL;
2266 
2267     } else {
2268         parent = uber_parent(rsc);
2269     }
2270 
2271     if (pcmk__is_anonymous_clone(parent)) {
2272 
2273         if (pcmk__is_bundled(parent)) {
2274             rsc = pe__find_bundle_replica(parent->parent, node);
2275         } else {
2276             char *base = clone_strip(rsc_id);
2277 
2278             rsc = find_anonymous_clone(scheduler, node, parent, base);
2279             free(base);
2280             pcmk__assert(rsc != NULL);
2281         }
2282     }
2283 
2284     if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
2285         && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_none)) {
2286 
2287         pcmk__str_update(&rsc->clone_name, rsc_id);
2288         pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2289                         rsc_id, pcmk__node_name(node), rsc->id,
2290                         pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : "");
2291     }
2292     return rsc;
2293 }
2294 
2295 static pcmk_resource_t *
2296 process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2297                         pcmk_scheduler_t *scheduler)
2298 {
2299     pcmk_resource_t *rsc = NULL;
2300     const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2301 
2302     crm_debug("Detected orphan resource %s on %s",
2303               rsc_id, pcmk__node_name(node));
2304     rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2305     if (rsc == NULL) {
2306         return NULL;
2307     }
2308 
2309     if (!pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) {
2310         pcmk__clear_rsc_flags(rsc, pcmk_rsc_managed);
2311 
2312     } else {
2313         CRM_CHECK(rsc != NULL, return NULL);
2314         pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id);
2315         resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
2316                           "__orphan_do_not_run__", scheduler);
2317     }
2318     return rsc;
2319 }
2320 
2321 static void
2322 process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
2323                   enum action_fail_response on_fail)
2324 {
2325     pcmk_node_t *tmpnode = NULL;
2326     char *reason = NULL;
2327     enum action_fail_response save_on_fail = pcmk_on_fail_ignore;
2328 
2329     pcmk__assert(rsc);
2330     pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2331                     rsc->id, pcmk_role_text(rsc->role), pcmk__node_name(node),
2332                     pcmk_on_fail_text(on_fail));
2333 
2334     /* process current state */
2335     if (rsc->role != pcmk_role_unknown) {
2336         pcmk_resource_t *iter = rsc;
2337 
2338         while (iter) {
2339             if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
2340                 pcmk_node_t *n = pe__copy_node(node);
2341 
2342                 pcmk__rsc_trace(rsc, "%s%s%s known on %s",
2343                                 rsc->id,
2344                                 ((rsc->clone_name == NULL)? "" : " also known as "),
2345                                 ((rsc->clone_name == NULL)? "" : rsc->clone_name),
2346                                 pcmk__node_name(n));
2347                 g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
2348             }
2349             if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) {
2350                 break;
2351             }
2352             iter = iter->parent;
2353         }
2354     }
2355 
2356     /* If a managed resource is believed to be running, but node is down ... */
2357     if ((rsc->role > pcmk_role_stopped)
2358         && node->details->online == FALSE
2359         && node->details->maintenance == FALSE
2360         && pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2361 
2362         gboolean should_fence = FALSE;
2363 
2364         /* If this is a guest node, fence it (regardless of whether fencing is
2365          * enabled, because guest node fencing is done by recovery of the
2366          * container resource rather than by the fencer). Mark the resource
2367          * we're processing as failed. When the guest comes back up, its
2368          * operation history in the CIB will be cleared, freeing the affected
2369          * resource to run again once we are sure we know its state.
2370          */
2371         if (pcmk__is_guest_or_bundle_node(node)) {
2372             pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2373             should_fence = TRUE;
2374 
2375         } else if (pcmk_is_set(rsc->cluster->flags,
2376                                pcmk_sched_fencing_enabled)) {
2377             if (pcmk__is_remote_node(node)
2378                 && (node->details->remote_rsc != NULL)
2379                 && !pcmk_is_set(node->details->remote_rsc->flags,
2380                                 pcmk_rsc_failed)) {
2381 
2382                 /* Setting unseen means that fencing of the remote node will
2383                  * occur only if the connection resource is not going to start
2384                  * somewhere. This allows connection resources on a failed
2385                  * cluster node to move to another node without requiring the
2386                  * remote nodes to be fenced as well.
2387                  */
2388                 node->details->unseen = TRUE;
2389                 reason = crm_strdup_printf("%s is active there (fencing will be"
2390                                            " revoked if remote connection can "
2391                                            "be re-established elsewhere)",
2392                                            rsc->id);
2393             }
2394             should_fence = TRUE;
2395         }
2396 
2397         if (should_fence) {
2398             if (reason == NULL) {
2399                reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
2400             }
2401             pe_fence_node(rsc->cluster, node, reason, FALSE);
2402         }
2403         free(reason);
2404     }
2405 
2406     /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2407     save_on_fail = on_fail;
2408 
2409     if (node->details->unclean) {
2410         /* No extra processing needed
2411          * Also allows resources to be started again after a node is shot
2412          */
2413         on_fail = pcmk_on_fail_ignore;
2414     }
2415 
2416     switch (on_fail) {
2417         case pcmk_on_fail_ignore:
2418             /* nothing to do */
2419             break;
2420 
2421         case pcmk_on_fail_demote:
2422             pcmk__set_rsc_flags(rsc, pcmk_rsc_failed);
2423             demote_action(rsc, node, FALSE);
2424             break;
2425 
2426         case pcmk_on_fail_fence_node:
2427             /* treat it as if it is still running
2428              * but also mark the node as unclean
2429              */
2430             reason = crm_strdup_printf("%s failed there", rsc->id);
2431             pe_fence_node(rsc->cluster, node, reason, FALSE);
2432             free(reason);
2433             break;
2434 
2435         case pcmk_on_fail_standby_node:
2436             node->details->standby = TRUE;
2437             node->details->standby_onfail = TRUE;
2438             break;
2439 
2440         case pcmk_on_fail_block:
2441             /* is_managed == FALSE will prevent any
2442              * actions being sent for the resource
2443              */
2444             pcmk__clear_rsc_flags(rsc, pcmk_rsc_managed);
2445             pcmk__set_rsc_flags(rsc, pcmk_rsc_blocked);
2446             break;
2447 
2448         case pcmk_on_fail_ban:
2449             /* make sure it comes up somewhere else
2450              * or not at all
2451              */
2452             resource_location(rsc, node, -PCMK_SCORE_INFINITY,
2453                               "__action_migration_auto__", rsc->cluster);
2454             break;
2455 
2456         case pcmk_on_fail_stop:
2457             pe__set_next_role(rsc, pcmk_role_stopped,
2458                               PCMK_META_ON_FAIL "=" PCMK_VALUE_STOP);
2459             break;
2460 
2461         case pcmk_on_fail_restart:
2462             if ((rsc->role != pcmk_role_stopped)
2463                 && (rsc->role != pcmk_role_unknown)) {
2464                 pcmk__set_rsc_flags(rsc,
2465                                     pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2466                 stop_action(rsc, node, FALSE);
2467             }
2468             break;
2469 
2470         case pcmk_on_fail_restart_container:
2471             pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2472             if ((rsc->container != NULL) && pcmk__is_bundled(rsc)) {
2473                 /* A bundle's remote connection can run on a different node than
2474                  * the bundle's container. We don't necessarily know where the
2475                  * container is running yet, so remember it and add a stop
2476                  * action for it later.
2477                  */
2478                 rsc->cluster->stop_needed =
2479                     g_list_prepend(rsc->cluster->stop_needed, rsc->container);
2480             } else if (rsc->container) {
2481                 stop_action(rsc->container, node, FALSE);
2482             } else if ((rsc->role != pcmk_role_stopped)
2483                        && (rsc->role != pcmk_role_unknown)) {
2484                 stop_action(rsc, node, FALSE);
2485             }
2486             break;
2487 
2488         case pcmk_on_fail_reset_remote:
2489             pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2490             if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) {
2491                 tmpnode = NULL;
2492                 if (rsc->is_remote_node) {
2493                     tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
2494                 }
2495                 if (pcmk__is_remote_node(tmpnode)
2496                     && !(tmpnode->details->remote_was_fenced)) {
2497                     /* The remote connection resource failed in a way that
2498                      * should result in fencing the remote node.
2499                      */
2500                     pe_fence_node(rsc->cluster, tmpnode,
2501                                   "remote connection is unrecoverable", FALSE);
2502                 }
2503             }
2504 
2505             /* require the stop action regardless if fencing is occurring or not. */
2506             if (rsc->role > pcmk_role_stopped) {
2507                 stop_action(rsc, node, FALSE);
2508             }
2509 
2510             /* if reconnect delay is in use, prevent the connection from exiting the
2511              * "STOPPED" role until the failure is cleared by the delay timeout. */
2512             if (rsc->remote_reconnect_ms) {
2513                 pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2514             }
2515             break;
2516     }
2517 
2518     /* ensure a remote-node connection failure forces an unclean remote-node
2519      * to be fenced. By setting unseen = FALSE, the remote-node failure will
2520      * result in a fencing operation regardless if we're going to attempt to 
2521      * reconnect to the remote-node in this transition or not. */
2522     if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) {
2523         tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
2524         if (tmpnode && tmpnode->details->unclean) {
2525             tmpnode->details->unseen = FALSE;
2526         }
2527     }
2528 
2529     if ((rsc->role != pcmk_role_stopped)
2530         && (rsc->role != pcmk_role_unknown)) {
2531         if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
2532             if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2533                 crm_notice("Removed resource %s is active on %s and will be "
2534                            "stopped when possible",
2535                            rsc->id, pcmk__node_name(node));
2536             } else {
2537                 crm_notice("Removed resource %s must be stopped manually on %s "
2538                            "because " PCMK_OPT_STOP_ORPHAN_RESOURCES
2539                            " is set to false", rsc->id, pcmk__node_name(node));
2540             }
2541         }
2542 
2543         native_add_running(rsc, node, rsc->cluster,
2544                            (save_on_fail != pcmk_on_fail_ignore));
2545         switch (on_fail) {
2546             case pcmk_on_fail_ignore:
2547                 break;
2548             case pcmk_on_fail_demote:
2549             case pcmk_on_fail_block:
2550                 pcmk__set_rsc_flags(rsc, pcmk_rsc_failed);
2551                 break;
2552             default:
2553                 pcmk__set_rsc_flags(rsc,
2554                                     pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2555                 break;
2556         }
2557 
2558     } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2559         /* Only do this for older status sections that included instance numbers
2560          * Otherwise stopped instances will appear as orphans
2561          */
2562         pcmk__rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)",
2563                         rsc->clone_name, rsc->id);
2564         free(rsc->clone_name);
2565         rsc->clone_name = NULL;
2566 
2567     } else {
2568         GList *possible_matches = pe__resource_actions(rsc, node,
2569                                                        PCMK_ACTION_STOP, FALSE);
2570         GList *gIter = possible_matches;
2571 
2572         for (; gIter != NULL; gIter = gIter->next) {
2573             pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2574 
2575             pcmk__set_action_flags(stop, pcmk_action_optional);
2576         }
2577 
2578         g_list_free(possible_matches);
2579     }
2580 
2581     /* A successful stop after migrate_to on the migration source doesn't make
2582      * the partially migrated resource stopped on the migration target.
2583      */
2584     if ((rsc->role == pcmk_role_stopped)
2585         && rsc->partial_migration_source
2586         && rsc->partial_migration_source->details == node->details
2587         && rsc->partial_migration_target
2588         && rsc->running_on) {
2589 
2590         rsc->role = pcmk_role_started;
2591     }
2592 }
2593 
2594 /* create active recurring operations as optional */
2595 static void
2596 process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
2597                   int start_index, int stop_index,
2598                   GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2599 {
2600     int counter = -1;
2601     const char *task = NULL;
2602     const char *status = NULL;
2603     GList *gIter = sorted_op_list;
2604 
2605     pcmk__assert(rsc != NULL);
2606     pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
2607                     rsc->id, start_index, stop_index);
2608 
2609     for (; gIter != NULL; gIter = gIter->next) {
2610         xmlNode *rsc_op = (xmlNode *) gIter->data;
2611 
2612         guint interval_ms = 0;
2613         char *key = NULL;
2614         const char *id = pcmk__xe_id(rsc_op);
2615 
2616         counter++;
2617 
2618         if (node->details->online == FALSE) {
2619             pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
2620                             rsc->id, pcmk__node_name(node));
2621             break;
2622 
2623             /* Need to check if there's a monitor for role="Stopped" */
2624         } else if (start_index < stop_index && counter <= stop_index) {
2625             pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2626                             id, pcmk__node_name(node));
2627             continue;
2628 
2629         } else if (counter < start_index) {
2630             pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
2631                             id, pcmk__node_name(node), counter);
2632             continue;
2633         }
2634 
2635         crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms);
2636         if (interval_ms == 0) {
2637             pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2638                             id, pcmk__node_name(node));
2639             continue;
2640         }
2641 
2642         status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2643         if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2644             pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
2645                             id, pcmk__node_name(node));
2646             continue;
2647         }
2648         task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2649         /* create the action */
2650         key = pcmk__op_key(rsc->id, task, interval_ms);
2651         pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
2652         custom_action(rsc, key, task, node, TRUE, scheduler);
2653     }
2654 }
2655 
2656 void
2657 calculate_active_ops(const GList *sorted_op_list, int *start_index,
     /* [previous][next][first][last][top][bottom][index][help] */
2658                      int *stop_index)
2659 {
2660     int counter = -1;
2661     int implied_monitor_start = -1;
2662     int implied_clone_start = -1;
2663     const char *task = NULL;
2664     const char *status = NULL;
2665 
2666     *stop_index = -1;
2667     *start_index = -1;
2668 
2669     for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2670         const xmlNode *rsc_op = (const xmlNode *) iter->data;
2671 
2672         counter++;
2673 
2674         task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2675         status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2676 
2677         if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2678             && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2679             *stop_index = counter;
2680 
2681         } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2682                                         PCMK_ACTION_MIGRATE_FROM, NULL)) {
2683             *start_index = counter;
2684 
2685         } else if ((implied_monitor_start <= *stop_index)
2686                    && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2687                                    pcmk__str_casei)) {
2688             const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE);
2689 
2690             if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2691                 implied_monitor_start = counter;
2692             }
2693         } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
2694                                         PCMK_ACTION_DEMOTE, NULL)) {
2695             implied_clone_start = counter;
2696         }
2697     }
2698 
2699     if (*start_index == -1) {
2700         if (implied_clone_start != -1) {
2701             *start_index = implied_clone_start;
2702         } else if (implied_monitor_start != -1) {
2703             *start_index = implied_monitor_start;
2704         }
2705     }
2706 }
2707 
2708 // If resource history entry has shutdown lock, remember lock node and time
2709 static void
2710 unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
2711                      const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2712 {
2713     time_t lock_time = 0;   // When lock started (i.e. node shutdown time)
2714 
2715     if ((crm_element_value_epoch(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK,
2716                                  &lock_time) == pcmk_ok) && (lock_time != 0)) {
2717 
2718         if ((scheduler->shutdown_lock > 0)
2719             && (get_effective_time(scheduler)
2720                 > (lock_time + scheduler->shutdown_lock))) {
2721             pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
2722                            rsc->id, pcmk__node_name(node));
2723             pe__clear_resource_history(rsc, node);
2724         } else {
2725             /* @COMPAT I don't like breaking const signatures, but
2726              * rsc->lock_node should really be const -- we just can't change it
2727              * until the next API compatibility break.
2728              */
2729             rsc->lock_node = (pcmk_node_t *) node;
2730             rsc->lock_time = lock_time;
2731         }
2732     }
2733 }
2734 
2735 /*!
2736  * \internal
2737  * \brief Unpack one \c PCMK__XE_LRM_RESOURCE entry from a node's CIB status
2738  *
2739  * \param[in,out] node       Node whose status is being unpacked
2740  * \param[in]     rsc_entry  \c PCMK__XE_LRM_RESOURCE XML being unpacked
2741  * \param[in,out] scheduler  Scheduler data
2742  *
2743  * \return Resource corresponding to the entry, or NULL if no operation history
2744  */
2745 static pcmk_resource_t *
2746 unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
     /* [previous][next][first][last][top][bottom][index][help] */
2747                     pcmk_scheduler_t *scheduler)
2748 {
2749     GList *gIter = NULL;
2750     int stop_index = -1;
2751     int start_index = -1;
2752     enum rsc_role_e req_role = pcmk_role_unknown;
2753 
2754     const char *rsc_id = pcmk__xe_id(lrm_resource);
2755 
2756     pcmk_resource_t *rsc = NULL;
2757     GList *op_list = NULL;
2758     GList *sorted_op_list = NULL;
2759 
2760     xmlNode *rsc_op = NULL;
2761     xmlNode *last_failure = NULL;
2762 
2763     enum action_fail_response on_fail = pcmk_on_fail_ignore;
2764     enum rsc_role_e saved_role = pcmk_role_unknown;
2765 
2766     if (rsc_id == NULL) {
2767         pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
2768                          " entry: No " PCMK_XA_ID);
2769         crm_log_xml_info(lrm_resource, "missing-id");
2770         return NULL;
2771     }
2772     crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s",
2773               rsc_id, pcmk__node_name(node));
2774 
2775     /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
2776      * them
2777      */
2778     for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
2779                                        NULL);
2780          rsc_op != NULL; rsc_op = pcmk__xe_next_same(rsc_op)) {
2781 
2782         op_list = g_list_prepend(op_list, rsc_op);
2783     }
2784 
2785     if (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
2786         if (op_list == NULL) {
2787             // If there are no operations, there is nothing to do
2788             return NULL;
2789         }
2790     }
2791 
2792     /* find the resource */
2793     rsc = unpack_find_resource(scheduler, node, rsc_id);
2794     if (rsc == NULL) {
2795         if (op_list == NULL) {
2796             // If there are no operations, there is nothing to do
2797             return NULL;
2798         } else {
2799             rsc = process_orphan_resource(lrm_resource, node, scheduler);
2800         }
2801     }
2802     pcmk__assert(rsc != NULL);
2803 
2804     // Check whether the resource is "shutdown-locked" to this node
2805     if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
2806         unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2807     }
2808 
2809     /* process operations */
2810     saved_role = rsc->role;
2811     rsc->role = pcmk_role_unknown;
2812     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2813 
2814     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2815         xmlNode *rsc_op = (xmlNode *) gIter->data;
2816 
2817         unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2818     }
2819 
2820     /* create active recurring operations as optional */
2821     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2822     process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2823                       scheduler);
2824 
2825     /* no need to free the contents */
2826     g_list_free(sorted_op_list);
2827 
2828     process_rsc_state(rsc, node, on_fail);
2829 
2830     if (get_target_role(rsc, &req_role)) {
2831         if ((rsc->next_role == pcmk_role_unknown)
2832             || (req_role < rsc->next_role)) {
2833 
2834             pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE);
2835 
2836         } else if (req_role > rsc->next_role) {
2837             pcmk__rsc_info(rsc,
2838                            "%s: Not overwriting calculated next role %s"
2839                            " with requested next role %s",
2840                            rsc->id, pcmk_role_text(rsc->next_role),
2841                            pcmk_role_text(req_role));
2842         }
2843     }
2844 
2845     if (saved_role > rsc->role) {
2846         rsc->role = saved_role;
2847     }
2848 
2849     return rsc;
2850 }
2851 
2852 static void
2853 handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list,
     /* [previous][next][first][last][top][bottom][index][help] */
2854                                   pcmk_scheduler_t *scheduler)
2855 {
2856     for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list, NULL,
2857                                                          NULL, NULL);
2858          rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
2859 
2860         pcmk_resource_t *rsc;
2861         pcmk_resource_t *container;
2862         const char *rsc_id;
2863         const char *container_id;
2864 
2865         if (!pcmk__xe_is(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2866             continue;
2867         }
2868 
2869         container_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER);
2870         rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2871         if (container_id == NULL || rsc_id == NULL) {
2872             continue;
2873         }
2874 
2875         container = pe_find_resource(scheduler->resources, container_id);
2876         if (container == NULL) {
2877             continue;
2878         }
2879 
2880         rsc = pe_find_resource(scheduler->resources, rsc_id);
2881         if ((rsc == NULL) || (rsc->container != NULL)
2882             || !pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) {
2883             continue;
2884         }
2885 
2886         pcmk__rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
2887                         rsc->id, container_id);
2888         rsc->container = container;
2889         container->fillers = g_list_append(container->fillers, rsc);
2890     }
2891 }
2892 
2893 /*!
2894  * \internal
2895  * \brief Unpack one node's lrm status section
2896  *
2897  * \param[in,out] node       Node whose status is being unpacked
2898  * \param[in]     xml        CIB node state XML
2899  * \param[in,out] scheduler  Scheduler data
2900  */
2901 static void
2902 unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
     /* [previous][next][first][last][top][bottom][index][help] */
2903                 pcmk_scheduler_t *scheduler)
2904 {
2905     bool found_orphaned_container_filler = false;
2906 
2907     // Drill down to PCMK__XE_LRM_RESOURCES section
2908     xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
2909     if (xml == NULL) {
2910         return;
2911     }
2912     xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
2913     if (xml == NULL) {
2914         return;
2915     }
2916 
2917     // Unpack each PCMK__XE_LRM_RESOURCE entry
2918     for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
2919                                                          PCMK__XE_LRM_RESOURCE,
2920                                                          NULL, NULL);
2921          rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) {
2922 
2923         pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2924 
2925         if ((rsc != NULL)
2926             && pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) {
2927             found_orphaned_container_filler = true;
2928         }
2929     }
2930 
2931     /* Now that all resource state has been unpacked for this node, map any
2932      * orphaned container fillers to their container resource.
2933      */
2934     if (found_orphaned_container_filler) {
2935         handle_orphaned_container_fillers(xml, scheduler);
2936     }
2937 }
2938 
2939 static void
2940 set_active(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
2941 {
2942     const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2943 
2944     if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) {
2945         rsc->role = pcmk_role_unpromoted;
2946     } else {
2947         rsc->role = pcmk_role_started;
2948     }
2949 }
2950 
2951 static void
2952 set_node_score(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
2953 {
2954     pcmk_node_t *node = value;
2955     int *score = user_data;
2956 
2957     node->weight = *score;
2958 }
2959 
2960 #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
2961                          "/" PCMK__XE_NODE_STATE
2962 #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM             \
2963                                "/" PCMK__XE_LRM_RESOURCES   \
2964                                "/" PCMK__XE_LRM_RESOURCE
2965 #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
2966 
2967 static xmlNode *
2968 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
     /* [previous][next][first][last][top][bottom][index][help] */
2969             int target_rc, pcmk_scheduler_t *scheduler)
2970 {
2971     GString *xpath = NULL;
2972     xmlNode *xml = NULL;
2973 
2974     CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2975               return NULL);
2976 
2977     xpath = g_string_sized_new(256);
2978     pcmk__g_strcat(xpath,
2979                    XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
2980                    SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
2981                    SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
2982                    NULL);
2983 
2984     /* Need to check against transition_magic too? */
2985     if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
2986         pcmk__g_strcat(xpath,
2987                        " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
2988                        NULL);
2989 
2990     } else if ((source != NULL)
2991                && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
2992         pcmk__g_strcat(xpath,
2993                        " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
2994                        NULL);
2995     } else {
2996         g_string_append_c(xpath, ']');
2997     }
2998 
2999     xml = get_xpath_object((const char *) xpath->str, scheduler->input,
3000                            LOG_DEBUG);
3001     g_string_free(xpath, TRUE);
3002 
3003     if (xml && target_rc >= 0) {
3004         int rc = PCMK_OCF_UNKNOWN_ERROR;
3005         int status = PCMK_EXEC_ERROR;
3006 
3007         crm_element_value_int(xml, PCMK__XA_RC_CODE, &rc);
3008         crm_element_value_int(xml, PCMK__XA_OP_STATUS, &status);
3009         if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
3010             return NULL;
3011         }
3012     }
3013     return xml;
3014 }
3015 
3016 static xmlNode *
3017 find_lrm_resource(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
3018                   pcmk_scheduler_t *scheduler)
3019 {
3020     GString *xpath = NULL;
3021     xmlNode *xml = NULL;
3022 
3023     CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
3024 
3025     xpath = g_string_sized_new(256);
3026     pcmk__g_strcat(xpath,
3027                    XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
3028                    SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
3029                    NULL);
3030 
3031     xml = get_xpath_object((const char *) xpath->str, scheduler->input,
3032                            LOG_DEBUG);
3033 
3034     g_string_free(xpath, TRUE);
3035     return xml;
3036 }
3037 
3038 /*!
3039  * \internal
3040  * \brief Check whether a resource has no completed action history on a node
3041  *
3042  * \param[in,out] rsc        Resource to check
3043  * \param[in]     node_name  Node to check
3044  *
3045  * \return true if \p rsc_id is unknown on \p node_name, otherwise false
3046  */
3047 static bool
3048 unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
3049 {
3050     bool result = false;
3051     xmlXPathObjectPtr search;
3052     char *xpath = NULL;
3053 
3054     xpath = crm_strdup_printf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
3055                               SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']"
3056                               SUB_XPATH_LRM_RSC_OP
3057                               "[@" PCMK__XA_RC_CODE "!='%d']",
3058                               node_name, rsc->id, PCMK_OCF_UNKNOWN);
3059 
3060     search = xpath_search(rsc->cluster->input, xpath);
3061     result = (numXpathResults(search) == 0);
3062     freeXpathObject(search);
3063     free(xpath);
3064     return result;
3065 }
3066 
3067 /*!
3068  * \brief Check whether a probe/monitor indicating the resource was not running
3069  * on a node happened after some event
3070  *
3071  * \param[in]     rsc_id     Resource being checked
3072  * \param[in]     node_name  Node being checked
3073  * \param[in]     xml_op     Event that monitor is being compared to
3074  * \param[in]     same_node  Whether the operations are on the same node
3075  * \param[in,out] scheduler  Scheduler data
3076  *
3077  * \return true if such a monitor happened after event, false otherwise
3078  */
3079 static bool
3080 monitor_not_running_after(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
3081                           const xmlNode *xml_op, bool same_node,
3082                           pcmk_scheduler_t *scheduler)
3083 {
3084     /* Any probe/monitor operation on the node indicating it was not running
3085      * there
3086      */
3087     xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
3088                                    NULL, PCMK_OCF_NOT_RUNNING, scheduler);
3089 
3090     return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0);
3091 }
3092 
3093 /*!
3094  * \brief Check whether any non-monitor operation on a node happened after some
3095  * event
3096  *
3097  * \param[in]     rsc_id     Resource being checked
3098  * \param[in]     node_name  Node being checked
3099  * \param[in]     xml_op     Event that non-monitor is being compared to
3100  * \param[in]     same_node  Whether the operations are on the same node
3101  * \param[in,out] scheduler  Scheduler data
3102  *
3103  * \return true if such a operation happened after event, false otherwise
3104  */
3105 static bool
3106 non_monitor_after(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
3107                   const xmlNode *xml_op, bool same_node,
3108                   pcmk_scheduler_t *scheduler)
3109 {
3110     xmlNode *lrm_resource = NULL;
3111 
3112     lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
3113     if (lrm_resource == NULL) {
3114         return false;
3115     }
3116 
3117     for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
3118                                             NULL, NULL);
3119          op != NULL; op = pcmk__xe_next_same(op)) {
3120 
3121         const char * task = NULL;
3122 
3123         if (op == xml_op) {
3124             continue;
3125         }
3126 
3127         task = crm_element_value(op, PCMK_XA_OPERATION);
3128 
3129         if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP,
3130                              PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
3131                              NULL)
3132             && pe__is_newer_op(op, xml_op, same_node) > 0) {
3133             return true;
3134         }
3135     }
3136 
3137     return false;
3138 }
3139 
3140 /*!
3141  * \brief Check whether the resource has newer state on a node after a migration
3142  * attempt
3143  *
3144  * \param[in]     rsc_id        Resource being checked
3145  * \param[in]     node_name     Node being checked
3146  * \param[in]     migrate_to    Any migrate_to event that is being compared to
3147  * \param[in]     migrate_from  Any migrate_from event that is being compared to
3148  * \param[in,out] scheduler     Scheduler data
3149  *
3150  * \return true if such a operation happened after event, false otherwise
3151  */
3152 static bool
3153 newer_state_after_migrate(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
3154                           const xmlNode *migrate_to,
3155                           const xmlNode *migrate_from,
3156                           pcmk_scheduler_t *scheduler)
3157 {
3158     const xmlNode *xml_op = migrate_to;
3159     const char *source = NULL;
3160     const char *target = NULL;
3161     bool same_node = false;
3162 
3163     if (migrate_from) {
3164         xml_op = migrate_from;
3165     }
3166 
3167     source = crm_element_value(xml_op, PCMK__META_MIGRATE_SOURCE);
3168     target = crm_element_value(xml_op, PCMK__META_MIGRATE_TARGET);
3169 
3170     /* It's preferred to compare to the migrate event on the same node if
3171      * existing, since call ids are more reliable.
3172      */
3173     if (pcmk__str_eq(node_name, target, pcmk__str_casei)) {
3174         if (migrate_from) {
3175            xml_op = migrate_from;
3176            same_node = true;
3177 
3178         } else {
3179            xml_op = migrate_to;
3180         }
3181 
3182     } else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3183         if (migrate_to) {
3184            xml_op = migrate_to;
3185            same_node = true;
3186 
3187         } else {
3188            xml_op = migrate_from;
3189         }
3190     }
3191 
3192     /* If there's any newer non-monitor operation on the node, or any newer
3193      * probe/monitor operation on the node indicating it was not running there,
3194      * the migration events potentially no longer matter for the node.
3195      */
3196     return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler)
3197            || monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
3198                                         scheduler);
3199 }
3200 
3201 /*!
3202  * \internal
3203  * \brief Parse migration source and target node names from history entry
3204  *
3205  * \param[in]  entry        Resource history entry for a migration action
3206  * \param[in]  source_node  If not NULL, source must match this node
3207  * \param[in]  target_node  If not NULL, target must match this node
3208  * \param[out] source_name  Where to store migration source node name
3209  * \param[out] target_name  Where to store migration target node name
3210  *
3211  * \return Standard Pacemaker return code
3212  */
3213 static int
3214 get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
     /* [previous][next][first][last][top][bottom][index][help] */
3215                          const pcmk_node_t *target_node,
3216                          const char **source_name, const char **target_name)
3217 {
3218     *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE);
3219     *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET);
3220     if ((*source_name == NULL) || (*target_name == NULL)) {
3221         pcmk__config_err("Ignoring resource history entry %s without "
3222                          PCMK__META_MIGRATE_SOURCE " and "
3223                          PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
3224         return pcmk_rc_unpack_error;
3225     }
3226 
3227     if ((source_node != NULL)
3228         && !pcmk__str_eq(*source_name, source_node->details->uname,
3229                          pcmk__str_casei|pcmk__str_null_matches)) {
3230         pcmk__config_err("Ignoring resource history entry %s because "
3231                          PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
3232                          pcmk__xe_id(entry), *source_name,
3233                          pcmk__node_name(source_node));
3234         return pcmk_rc_unpack_error;
3235     }
3236 
3237     if ((target_node != NULL)
3238         && !pcmk__str_eq(*target_name, target_node->details->uname,
3239                          pcmk__str_casei|pcmk__str_null_matches)) {
3240         pcmk__config_err("Ignoring resource history entry %s because "
3241                          PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
3242                          pcmk__xe_id(entry), *target_name,
3243                          pcmk__node_name(target_node));
3244         return pcmk_rc_unpack_error;
3245     }
3246 
3247     return pcmk_rc_ok;
3248 }
3249 
3250 /*
3251  * \internal
3252  * \brief Add a migration source to a resource's list of dangling migrations
3253  *
3254  * If the migrate_to and migrate_from actions in a live migration both
3255  * succeeded, but there is no stop on the source, the migration is considered
3256  * "dangling." Add the source to the resource's dangling migration list, which
3257  * will be used to schedule a stop on the source without affecting the target.
3258  *
3259  * \param[in,out] rsc   Resource involved in migration
3260  * \param[in]     node  Migration source
3261  */
3262 static void
3263 add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
3264 {
3265     pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3266                     rsc->id, pcmk__node_name(node));
3267     rsc->role = pcmk_role_stopped;
3268     rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations,
3269                                               (gpointer) node);
3270 }
3271 
3272 /*!
3273  * \internal
3274  * \brief Update resource role etc. after a successful migrate_to action
3275  *
3276  * \param[in,out] history  Parsed action result history
3277  */
3278 static void
3279 unpack_migrate_to_success(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3280 {
3281     /* A complete migration sequence is:
3282      * 1. migrate_to on source node (which succeeded if we get to this function)
3283      * 2. migrate_from on target node
3284      * 3. stop on source node
3285      *
3286      * If no migrate_from has happened, the migration is considered to be
3287      * "partial". If the migrate_from succeeded but no stop has happened, the
3288      * migration is considered to be "dangling".
3289      *
3290      * If a successful migrate_to and stop have happened on the source node, we
3291      * still need to check for a partial migration, due to scenarios (easier to
3292      * produce with batch-limit=1) like:
3293      *
3294      * - A resource is migrating from node1 to node2, and a migrate_to is
3295      *   initiated for it on node1.
3296      *
3297      * - node2 goes into standby mode while the migrate_to is pending, which
3298      *   aborts the transition.
3299      *
3300      * - Upon completion of the migrate_to, a new transition schedules a stop
3301      *   on both nodes and a start on node1.
3302      *
3303      * - If the new transition is aborted for any reason while the resource is
3304      *   stopping on node1, the transition after that stop completes will see
3305      *   the migrate_to and stop on the source, but it's still a partial
3306      *   migration, and the resource must be stopped on node2 because it is
3307      *   potentially active there due to the migrate_to.
3308      *
3309      *   We also need to take into account that either node's history may be
3310      *   cleared at any point in the migration process.
3311      */
3312     int from_rc = PCMK_OCF_OK;
3313     int from_status = PCMK_EXEC_PENDING;
3314     pcmk_node_t *target_node = NULL;
3315     xmlNode *migrate_from = NULL;
3316     const char *source = NULL;
3317     const char *target = NULL;
3318     bool source_newer_op = false;
3319     bool target_newer_state = false;
3320     bool active_on_target = false;
3321 
3322     // Get source and target node names from XML
3323     if (get_migration_node_names(history->xml, history->node, NULL, &source,
3324                                  &target) != pcmk_rc_ok) {
3325         return;
3326     }
3327 
3328     // Check for newer state on the source
3329     source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3330                                         true, history->rsc->cluster);
3331 
3332     // Check for a migrate_from action from this source on the target
3333     migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3334                                target, source, -1, history->rsc->cluster);
3335     if (migrate_from != NULL) {
3336         if (source_newer_op) {
3337             /* There's a newer non-monitor operation on the source and a
3338              * migrate_from on the target, so this migrate_to is irrelevant to
3339              * the resource's state.
3340              */
3341             return;
3342         }
3343         crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
3344         crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
3345     }
3346 
3347     /* If the resource has newer state on both the source and target after the
3348      * migration events, this migrate_to is irrelevant to the resource's state.
3349      */
3350     target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3351                                                    history->xml, migrate_from,
3352                                                    history->rsc->cluster);
3353     if (source_newer_op && target_newer_state) {
3354         return;
3355     }
3356 
3357     /* Check for dangling migration (migrate_from succeeded but stop not done).
3358      * We know there's no stop because we already returned if the target has a
3359      * migrate_from and the source has any newer non-monitor operation.
3360      */
3361     if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3362         add_dangling_migration(history->rsc, history->node);
3363         return;
3364     }
3365 
3366     /* Without newer state, this migrate_to implies the resource is active.
3367      * (Clones are not allowed to migrate, so role can't be promoted.)
3368      */
3369     history->rsc->role = pcmk_role_started;
3370 
3371     target_node = pcmk_find_node(history->rsc->cluster, target);
3372     active_on_target = !target_newer_state && (target_node != NULL)
3373                        && target_node->details->online;
3374 
3375     if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3376         if (active_on_target) {
3377             native_add_running(history->rsc, target_node, history->rsc->cluster,
3378                                TRUE);
3379         } else {
3380             // Mark resource as failed, require recovery, and prevent migration
3381             pcmk__set_rsc_flags(history->rsc,
3382                                 pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
3383             pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_migratable);
3384         }
3385         return;
3386     }
3387 
3388     // The migrate_from is pending, complete but erased, or to be scheduled
3389 
3390     /* If there is no history at all for the resource on an online target, then
3391      * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3392      * have the probe result, it will be reflected in target_newer_state.
3393      */
3394     if ((target_node != NULL) && target_node->details->online
3395         && unknown_on_node(history->rsc, target)) {
3396         return;
3397     }
3398 
3399     if (active_on_target) {
3400         pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
3401                                                   source);
3402 
3403         native_add_running(history->rsc, target_node, history->rsc->cluster,
3404                            FALSE);
3405         if ((source_node != NULL) && source_node->details->online) {
3406             /* This is a partial migration: the migrate_to completed
3407              * successfully on the source, but the migrate_from has not
3408              * completed. Remember the source and target; if the newly
3409              * chosen target remains the same when we schedule actions
3410              * later, we may continue with the migration.
3411              */
3412             history->rsc->partial_migration_target = target_node;
3413             history->rsc->partial_migration_source = source_node;
3414         }
3415 
3416     } else if (!source_newer_op) {
3417         // Mark resource as failed, require recovery, and prevent migration
3418         pcmk__set_rsc_flags(history->rsc,
3419                             pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
3420         pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_migratable);
3421     }
3422 }
3423 
3424 /*!
3425  * \internal
3426  * \brief Update resource role etc. after a failed migrate_to action
3427  *
3428  * \param[in,out] history  Parsed action result history
3429  */
3430 static void
3431 unpack_migrate_to_failure(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3432 {
3433     xmlNode *target_migrate_from = NULL;
3434     const char *source = NULL;
3435     const char *target = NULL;
3436 
3437     // Get source and target node names from XML
3438     if (get_migration_node_names(history->xml, history->node, NULL, &source,
3439                                  &target) != pcmk_rc_ok) {
3440         return;
3441     }
3442 
3443     /* If a migration failed, we have to assume the resource is active. Clones
3444      * are not allowed to migrate, so role can't be promoted.
3445      */
3446     history->rsc->role = pcmk_role_started;
3447 
3448     // Check for migrate_from on the target
3449     target_migrate_from = find_lrm_op(history->rsc->id,
3450                                       PCMK_ACTION_MIGRATE_FROM, target, source,
3451                                       PCMK_OCF_OK, history->rsc->cluster);
3452 
3453     if (/* If the resource state is unknown on the target, it will likely be
3454          * probed there.
3455          * Don't just consider it running there. We will get back here anyway in
3456          * case the probe detects it's running there.
3457          */
3458         !unknown_on_node(history->rsc, target)
3459         /* If the resource has newer state on the target after the migration
3460          * events, this migrate_to no longer matters for the target.
3461          */
3462         && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3463                                       target_migrate_from,
3464                                       history->rsc->cluster)) {
3465         /* The resource has no newer state on the target, so assume it's still
3466          * active there.
3467          * (if it is up).
3468          */
3469         pcmk_node_t *target_node = pcmk_find_node(history->rsc->cluster,
3470                                                   target);
3471 
3472         if (target_node && target_node->details->online) {
3473             native_add_running(history->rsc, target_node, history->rsc->cluster,
3474                                FALSE);
3475         }
3476 
3477     } else if (!non_monitor_after(history->rsc->id, source, history->xml, true,
3478                                   history->rsc->cluster)) {
3479         /* We know the resource has newer state on the target, but this
3480          * migrate_to still matters for the source as long as there's no newer
3481          * non-monitor operation there.
3482          */
3483 
3484         // Mark node as having dangling migration so we can force a stop later
3485         history->rsc->dangling_migrations =
3486             g_list_prepend(history->rsc->dangling_migrations,
3487                            (gpointer) history->node);
3488     }
3489 }
3490 
3491 /*!
3492  * \internal
3493  * \brief Update resource role etc. after a failed migrate_from action
3494  *
3495  * \param[in,out] history  Parsed action result history
3496  */
3497 static void
3498 unpack_migrate_from_failure(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3499 {
3500     xmlNode *source_migrate_to = NULL;
3501     const char *source = NULL;
3502     const char *target = NULL;
3503 
3504     // Get source and target node names from XML
3505     if (get_migration_node_names(history->xml, NULL, history->node, &source,
3506                                  &target) != pcmk_rc_ok) {
3507         return;
3508     }
3509 
3510     /* If a migration failed, we have to assume the resource is active. Clones
3511      * are not allowed to migrate, so role can't be promoted.
3512      */
3513     history->rsc->role = pcmk_role_started;
3514 
3515     // Check for a migrate_to on the source
3516     source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3517                                     source, target, PCMK_OCF_OK,
3518                                     history->rsc->cluster);
3519 
3520     if (/* If the resource state is unknown on the source, it will likely be
3521          * probed there.
3522          * Don't just consider it running there. We will get back here anyway in
3523          * case the probe detects it's running there.
3524          */
3525         !unknown_on_node(history->rsc, source)
3526         /* If the resource has newer state on the source after the migration
3527          * events, this migrate_from no longer matters for the source.
3528          */
3529         && !newer_state_after_migrate(history->rsc->id, source,
3530                                       source_migrate_to, history->xml,
3531                                       history->rsc->cluster)) {
3532         /* The resource has no newer state on the source, so assume it's still
3533          * active there (if it is up).
3534          */
3535         pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
3536                                                   source);
3537 
3538         if (source_node && source_node->details->online) {
3539             native_add_running(history->rsc, source_node, history->rsc->cluster,
3540                                TRUE);
3541         }
3542     }
3543 }
3544 
3545 /*!
3546  * \internal
3547  * \brief Add an action to cluster's list of failed actions
3548  *
3549  * \param[in,out] history  Parsed action result history
3550  */
3551 static void
3552 record_failed_op(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3553 {
3554     if (!(history->node->details->online)) {
3555         return;
3556     }
3557 
3558     for (const xmlNode *xIter = history->rsc->cluster->failed->children;
3559          xIter != NULL; xIter = xIter->next) {
3560 
3561         const char *key = pcmk__xe_history_key(xIter);
3562         const char *uname = crm_element_value(xIter, PCMK_XA_UNAME);
3563 
3564         if (pcmk__str_eq(history->key, key, pcmk__str_none)
3565             && pcmk__str_eq(uname, history->node->details->uname,
3566                             pcmk__str_casei)) {
3567             crm_trace("Skipping duplicate entry %s on %s",
3568                       history->key, pcmk__node_name(history->node));
3569             return;
3570         }
3571     }
3572 
3573     crm_trace("Adding entry for %s on %s to failed action list",
3574               history->key, pcmk__node_name(history->node));
3575     crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
3576     crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
3577     pcmk__xml_copy(history->rsc->cluster->failed, history->xml);
3578 }
3579 
3580 static char *
3581 last_change_str(const xmlNode *xml_op)
     /* [previous][next][first][last][top][bottom][index][help] */
3582 {
3583     time_t when;
3584     char *result = NULL;
3585 
3586     if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE,
3587                                 &when) == pcmk_ok) {
3588         char *when_s = pcmk__epoch2str(&when, 0);
3589         const char *p = strchr(when_s, ' ');
3590 
3591         // Skip day of week to make message shorter
3592         if ((p != NULL) && (*(++p) != '\0')) {
3593             result = pcmk__str_copy(p);
3594         }
3595         free(when_s);
3596     }
3597 
3598     if (result == NULL) {
3599         result = pcmk__str_copy("unknown_time");
3600     }
3601 
3602     return result;
3603 }
3604 
3605 /*!
3606  * \internal
3607  * \brief Compare two on-fail values
3608  *
3609  * \param[in] first   One on-fail value to compare
3610  * \param[in] second  The other on-fail value to compare
3611  *
3612  * \return A negative number if second is more severe than first, zero if they
3613  *         are equal, or a positive number if first is more severe than second.
3614  * \note This is only needed until the action_fail_response values can be
3615  *       renumbered at the next API compatibility break.
3616  */
3617 static int
3618 cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
     /* [previous][next][first][last][top][bottom][index][help] */
3619 {
3620     switch (first) {
3621         case pcmk_on_fail_demote:
3622             switch (second) {
3623                 case pcmk_on_fail_ignore:
3624                     return 1;
3625                 case pcmk_on_fail_demote:
3626                     return 0;
3627                 default:
3628                     return -1;
3629             }
3630             break;
3631 
3632         case pcmk_on_fail_reset_remote:
3633             switch (second) {
3634                 case pcmk_on_fail_ignore:
3635                 case pcmk_on_fail_demote:
3636                 case pcmk_on_fail_restart:
3637                     return 1;
3638                 case pcmk_on_fail_reset_remote:
3639                     return 0;
3640                 default:
3641                     return -1;
3642             }
3643             break;
3644 
3645         case pcmk_on_fail_restart_container:
3646             switch (second) {
3647                 case pcmk_on_fail_ignore:
3648                 case pcmk_on_fail_demote:
3649                 case pcmk_on_fail_restart:
3650                 case pcmk_on_fail_reset_remote:
3651                     return 1;
3652                 case pcmk_on_fail_restart_container:
3653                     return 0;
3654                 default:
3655                     return -1;
3656             }
3657             break;
3658 
3659         default:
3660             break;
3661     }
3662     switch (second) {
3663         case pcmk_on_fail_demote:
3664             return (first == pcmk_on_fail_ignore)? -1 : 1;
3665 
3666         case pcmk_on_fail_reset_remote:
3667             switch (first) {
3668                 case pcmk_on_fail_ignore:
3669                 case pcmk_on_fail_demote:
3670                 case pcmk_on_fail_restart:
3671                     return -1;
3672                 default:
3673                     return 1;
3674             }
3675             break;
3676 
3677         case pcmk_on_fail_restart_container:
3678             switch (first) {
3679                 case pcmk_on_fail_ignore:
3680                 case pcmk_on_fail_demote:
3681                 case pcmk_on_fail_restart:
3682                 case pcmk_on_fail_reset_remote:
3683                     return -1;
3684                 default:
3685                     return 1;
3686             }
3687             break;
3688 
3689         default:
3690             break;
3691     }
3692     return first - second;
3693 }
3694 
3695 /*!
3696  * \internal
3697  * \brief Ban a resource (or its clone if an anonymous instance) from all nodes
3698  *
3699  * \param[in,out] rsc  Resource to ban
3700  */
3701 static void
3702 ban_from_all_nodes(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
3703 {
3704     int score = -PCMK_SCORE_INFINITY;
3705     pcmk_resource_t *fail_rsc = rsc;
3706 
3707     if (fail_rsc->parent != NULL) {
3708         pcmk_resource_t *parent = uber_parent(fail_rsc);
3709 
3710         if (pcmk__is_anonymous_clone(parent)) {
3711             /* For anonymous clones, if an operation with
3712              * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
3713              * entire clone must stop.
3714              */
3715             fail_rsc = parent;
3716         }
3717     }
3718 
3719     // Ban the resource from all nodes
3720     crm_notice("%s will not be started under current conditions", fail_rsc->id);
3721     if (fail_rsc->allowed_nodes != NULL) {
3722         g_hash_table_destroy(fail_rsc->allowed_nodes);
3723     }
3724     fail_rsc->allowed_nodes = pe__node_list2table(rsc->cluster->nodes);
3725     g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
3726 }
3727 
3728 /*!
3729  * \internal
3730  * \brief Get configured failure handling and role after failure for an action
3731  *
3732  * \param[in,out] history    Unpacked action history entry
3733  * \param[out]    on_fail    Where to set configured failure handling
3734  * \param[out]    fail_role  Where to set to role after failure
3735  */
3736 static void
3737 unpack_failure_handling(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
3738                         enum action_fail_response *on_fail,
3739                         enum rsc_role_e *fail_role)
3740 {
3741     xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3742                                                history->interval_ms, true);
3743 
3744     GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3745                                                 history->task,
3746                                                 history->interval_ms, config);
3747 
3748     const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
3749 
3750     *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3751                                    history->interval_ms, on_fail_str);
3752     *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3753                                           meta);
3754     g_hash_table_destroy(meta);
3755 }
3756 
3757 /*!
3758  * \internal
3759  * \brief Update resource role, failure handling, etc., after a failed action
3760  *
3761  * \param[in,out] history         Parsed action result history
3762  * \param[in]     config_on_fail  Action failure handling from configuration
3763  * \param[in]     fail_role       Resource's role after failure of this action
3764  * \param[out]    last_failure    This will be set to the history XML
3765  * \param[in,out] on_fail         Actual handling of action result
3766  */
3767 static void
3768 unpack_rsc_op_failure(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
3769                       enum action_fail_response config_on_fail,
3770                       enum rsc_role_e fail_role, xmlNode **last_failure,
3771                       enum action_fail_response *on_fail)
3772 {
3773     bool is_probe = false;
3774     char *last_change_s = NULL;
3775 
3776     *last_failure = history->xml;
3777 
3778     is_probe = pcmk_xe_is_probe(history->xml);
3779     last_change_s = last_change_str(history->xml);
3780 
3781     if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster)
3782         && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3783         crm_trace("Unexpected result (%s%s%s) was recorded for "
3784                   "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3785                   services_ocf_exitcode_str(history->exit_status),
3786                   (pcmk__str_empty(history->exit_reason)? "" : ": "),
3787                   pcmk__s(history->exit_reason, ""),
3788                   (is_probe? "probe" : history->task), history->rsc->id,
3789                   pcmk__node_name(history->node), last_change_s,
3790                   history->exit_status, history->id);
3791     } else {
3792         pcmk__sched_warn("Unexpected result (%s%s%s) was recorded for %s of "
3793                          "%s on %s at %s " CRM_XS " exit-status=%d id=%s",
3794                          services_ocf_exitcode_str(history->exit_status),
3795                          (pcmk__str_empty(history->exit_reason)? "" : ": "),
3796                          pcmk__s(history->exit_reason, ""),
3797                          (is_probe? "probe" : history->task), history->rsc->id,
3798                          pcmk__node_name(history->node), last_change_s,
3799                          history->exit_status, history->id);
3800 
3801         if (is_probe && (history->exit_status != PCMK_OCF_OK)
3802             && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3803             && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3804 
3805             /* A failed (not just unexpected) probe result could mean the user
3806              * didn't know resources will be probed even where they can't run.
3807              */
3808             crm_notice("If it is not possible for %s to run on %s, see "
3809                        "the " PCMK_XA_RESOURCE_DISCOVERY " option for location "
3810                        "constraints",
3811                        history->rsc->id, pcmk__node_name(history->node));
3812         }
3813 
3814         record_failed_op(history);
3815     }
3816 
3817     free(last_change_s);
3818 
3819     if (cmp_on_fail(*on_fail, config_on_fail) < 0) {
3820         pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3821                         pcmk_on_fail_text(*on_fail),
3822                         pcmk_on_fail_text(config_on_fail), history->key);
3823         *on_fail = config_on_fail;
3824     }
3825 
3826     if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3827         resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
3828                           "__stop_fail__", history->rsc->cluster);
3829 
3830     } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3831         unpack_migrate_to_failure(history);
3832 
3833     } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3834         unpack_migrate_from_failure(history);
3835 
3836     } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3837         history->rsc->role = pcmk_role_promoted;
3838 
3839     } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3840         if (config_on_fail == pcmk_on_fail_block) {
3841             history->rsc->role = pcmk_role_promoted;
3842             pe__set_next_role(history->rsc, pcmk_role_stopped,
3843                               "demote with " PCMK_META_ON_FAIL "=block");
3844 
3845         } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3846             history->rsc->role = pcmk_role_stopped;
3847 
3848         } else {
3849             /* Staying in the promoted role would put the scheduler and
3850              * controller into a loop. Setting the role to unpromoted is not
3851              * dangerous because the resource will be stopped as part of
3852              * recovery, and any promotion will be ordered after that stop.
3853              */
3854             history->rsc->role = pcmk_role_unpromoted;
3855         }
3856     }
3857 
3858     if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3859         /* leave stopped */
3860         pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3861         history->rsc->role = pcmk_role_stopped;
3862 
3863     } else if (history->rsc->role < pcmk_role_started) {
3864         pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3865         set_active(history->rsc);
3866     }
3867 
3868     pcmk__rsc_trace(history->rsc,
3869                     "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3870                     history->rsc->id, pcmk_role_text(history->rsc->role),
3871                     pcmk__btoa(history->node->details->unclean),
3872                     pcmk_on_fail_text(config_on_fail),
3873                     pcmk_role_text(fail_role));
3874 
3875     if ((fail_role != pcmk_role_started)
3876         && (history->rsc->next_role < fail_role)) {
3877         pe__set_next_role(history->rsc, fail_role, "failure");
3878     }
3879 
3880     if (fail_role == pcmk_role_stopped) {
3881         ban_from_all_nodes(history->rsc);
3882     }
3883 }
3884 
3885 /*!
3886  * \internal
3887  * \brief Block a resource with a failed action if it cannot be recovered
3888  *
3889  * If resource action is a failed stop and fencing is not possible, mark the
3890  * resource as unmanaged and blocked, since recovery cannot be done.
3891  *
3892  * \param[in,out] history  Parsed action history entry
3893  */
3894 static void
3895 block_if_unrecoverable(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
3896 {
3897     char *last_change_s = NULL;
3898 
3899     if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3900         return; // All actions besides stop are always recoverable
3901     }
3902     if (pe_can_fence(history->node->details->data_set, history->node)) {
3903         return; // Failed stops are recoverable via fencing
3904     }
3905 
3906     last_change_s = last_change_str(history->xml);
3907     pcmk__sched_err("No further recovery can be attempted for %s "
3908                     "because %s on %s failed (%s%s%s) at %s "
3909                     CRM_XS " rc=%d id=%s",
3910                     history->rsc->id, history->task,
3911                     pcmk__node_name(history->node),
3912                     services_ocf_exitcode_str(history->exit_status),
3913                     (pcmk__str_empty(history->exit_reason)? "" : ": "),
3914                     pcmk__s(history->exit_reason, ""),
3915                     last_change_s, history->exit_status, history->id);
3916 
3917     free(last_change_s);
3918 
3919     pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_managed);
3920     pcmk__set_rsc_flags(history->rsc, pcmk_rsc_blocked);
3921 }
3922 
3923 /*!
3924  * \internal
3925  * \brief Update action history's execution status and why
3926  *
3927  * \param[in,out] history  Parsed action history entry
3928  * \param[out]    why      Where to store reason for update
3929  * \param[in]     value    New value
3930  * \param[in]     reason   Description of why value was changed
3931  */
3932 static inline void
3933 remap_because(struct action_history *history, const char **why, int value,
     /* [previous][next][first][last][top][bottom][index][help] */
3934               const char *reason)
3935 {
3936     if (history->execution_status != value) {
3937         history->execution_status = value;
3938         *why = reason;
3939     }
3940 }
3941 
3942 /*!
3943  * \internal
3944  * \brief Remap informational monitor results and operation status
3945  *
3946  * For the monitor results, certain OCF codes are for providing extended information
3947  * to the user about services that aren't yet failed but not entirely healthy either.
3948  * These must be treated as the "normal" result by Pacemaker.
3949  *
3950  * For operation status, the action result can be used to determine an appropriate
3951  * status for the purposes of responding to the action.  The status provided by the
3952  * executor is not directly usable since the executor does not know what was expected.
3953  *
3954  * \param[in,out] history  Parsed action history entry
3955  * \param[in,out] on_fail  What should be done about the result
3956  * \param[in]     expired  Whether result is expired
3957  *
3958  * \note If the result is remapped and the node is not shutting down or failed,
3959  *       the operation will be recorded in the scheduler data's list of failed
3960  *       operations to highlight it for the user.
3961  *
3962  * \note This may update the resource's current and next role.
3963  */
3964 static void
3965 remap_operation(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
3966                 enum action_fail_response *on_fail, bool expired)
3967 {
3968     bool is_probe = false;
3969     int orig_exit_status = history->exit_status;
3970     int orig_exec_status = history->execution_status;
3971     const char *why = NULL;
3972     const char *task = history->task;
3973 
3974     // Remap degraded results to their successful counterparts
3975     history->exit_status = pcmk__effective_rc(history->exit_status);
3976     if (history->exit_status != orig_exit_status) {
3977         why = "degraded result";
3978         if (!expired && (!history->node->details->shutdown
3979                          || history->node->details->online)) {
3980             record_failed_op(history);
3981         }
3982     }
3983 
3984     if (!pcmk__is_bundled(history->rsc)
3985         && pcmk_xe_mask_probe_failure(history->xml)
3986         && ((history->execution_status != PCMK_EXEC_DONE)
3987             || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3988         history->execution_status = PCMK_EXEC_DONE;
3989         history->exit_status = PCMK_OCF_NOT_RUNNING;
3990         why = "equivalent probe result";
3991     }
3992 
3993     /* If the executor reported an execution status of anything but done or
3994      * error, consider that final. But for done or error, we know better whether
3995      * it should be treated as a failure or not, because we know the expected
3996      * result.
3997      */
3998     switch (history->execution_status) {
3999         case PCMK_EXEC_DONE:
4000         case PCMK_EXEC_ERROR:
4001             break;
4002 
4003         // These should be treated as node-fatal
4004         case PCMK_EXEC_NO_FENCE_DEVICE:
4005         case PCMK_EXEC_NO_SECRETS:
4006             remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
4007                           "node-fatal error");
4008             goto remap_done;
4009 
4010         default:
4011             goto remap_done;
4012     }
4013 
4014     is_probe = pcmk_xe_is_probe(history->xml);
4015     if (is_probe) {
4016         task = "probe";
4017     }
4018 
4019     if (history->expected_exit_status < 0) {
4020         /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
4021          * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
4022          * expected exit status in the transition key, which (along with the
4023          * similar case of a corrupted transition key in the CIB) will be
4024          * reported to this function as -1. Pacemaker 2.0+ does not support
4025          * rolling upgrades from those versions or processing of saved CIB files
4026          * from those versions, so we do not need to care much about this case.
4027          */
4028         remap_because(history, &why, PCMK_EXEC_ERROR,
4029                       "obsolete history format");
4030         pcmk__config_warn("Expected result not found for %s on %s "
4031                           "(corrupt or obsolete CIB?)",
4032                           history->key, pcmk__node_name(history->node));
4033 
4034     } else if (history->exit_status == history->expected_exit_status) {
4035         remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
4036 
4037     } else {
4038         remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
4039         pcmk__rsc_debug(history->rsc,
4040                         "%s on %s: expected %d (%s), got %d (%s%s%s)",
4041                         history->key, pcmk__node_name(history->node),
4042                         history->expected_exit_status,
4043                         services_ocf_exitcode_str(history->expected_exit_status),
4044                         history->exit_status,
4045                         services_ocf_exitcode_str(history->exit_status),
4046                         (pcmk__str_empty(history->exit_reason)? "" : ": "),
4047                         pcmk__s(history->exit_reason, ""));
4048     }
4049 
4050     switch (history->exit_status) {
4051         case PCMK_OCF_OK:
4052             if (is_probe
4053                 && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
4054                 char *last_change_s = last_change_str(history->xml);
4055 
4056                 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4057                 pcmk__rsc_info(history->rsc,
4058                                "Probe found %s active on %s at %s",
4059                                history->rsc->id, pcmk__node_name(history->node),
4060                                last_change_s);
4061                 free(last_change_s);
4062             }
4063             break;
4064 
4065         case PCMK_OCF_NOT_RUNNING:
4066             if (is_probe
4067                 || (history->expected_exit_status == history->exit_status)
4068                 || !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) {
4069 
4070                 /* For probes, recurring monitors for the Stopped role, and
4071                  * unmanaged resources, "not running" is not considered a
4072                  * failure.
4073                  */
4074                 remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
4075                 history->rsc->role = pcmk_role_stopped;
4076                 *on_fail = pcmk_on_fail_ignore;
4077                 pe__set_next_role(history->rsc, pcmk_role_unknown,
4078                                   "not running");
4079             }
4080             break;
4081 
4082         case PCMK_OCF_RUNNING_PROMOTED:
4083             if (is_probe
4084                 && (history->exit_status != history->expected_exit_status)) {
4085                 char *last_change_s = last_change_str(history->xml);
4086 
4087                 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4088                 pcmk__rsc_info(history->rsc,
4089                                "Probe found %s active and promoted on %s at %s",
4090                                 history->rsc->id,
4091                                 pcmk__node_name(history->node), last_change_s);
4092                 free(last_change_s);
4093             }
4094             if (!expired
4095                 || (history->exit_status == history->expected_exit_status)) {
4096                 history->rsc->role = pcmk_role_promoted;
4097             }
4098             break;
4099 
4100         case PCMK_OCF_FAILED_PROMOTED:
4101             if (!expired) {
4102                 history->rsc->role = pcmk_role_promoted;
4103             }
4104             remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
4105             break;
4106 
4107         case PCMK_OCF_NOT_CONFIGURED:
4108             remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
4109             break;
4110 
4111         case PCMK_OCF_UNIMPLEMENT_FEATURE:
4112             {
4113                 guint interval_ms = 0;
4114                 crm_element_value_ms(history->xml, PCMK_META_INTERVAL,
4115                                      &interval_ms);
4116 
4117                 if (interval_ms == 0) {
4118                     if (!expired) {
4119                         block_if_unrecoverable(history);
4120                     }
4121                     remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
4122                                   "exit status");
4123                 } else {
4124                     remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
4125                                   "exit status");
4126                 }
4127             }
4128             break;
4129 
4130         case PCMK_OCF_NOT_INSTALLED:
4131         case PCMK_OCF_INVALID_PARAM:
4132         case PCMK_OCF_INSUFFICIENT_PRIV:
4133             if (!expired) {
4134                 block_if_unrecoverable(history);
4135             }
4136             remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
4137             break;
4138 
4139         default:
4140             if (history->execution_status == PCMK_EXEC_DONE) {
4141                 char *last_change_s = last_change_str(history->xml);
4142 
4143                 crm_info("Treating unknown exit status %d from %s of %s "
4144                          "on %s at %s as failure",
4145                          history->exit_status, task, history->rsc->id,
4146                          pcmk__node_name(history->node), last_change_s);
4147                 remap_because(history, &why, PCMK_EXEC_ERROR,
4148                               "unknown exit status");
4149                 free(last_change_s);
4150             }
4151             break;
4152     }
4153 
4154 remap_done:
4155     if (why != NULL) {
4156         pcmk__rsc_trace(history->rsc,
4157                         "Remapped %s result from [%s: %s] to [%s: %s] "
4158                         "because of %s",
4159                         history->key, pcmk_exec_status_str(orig_exec_status),
4160                         crm_exit_str(orig_exit_status),
4161                         pcmk_exec_status_str(history->execution_status),
4162                         crm_exit_str(history->exit_status), why);
4163     }
4164 }
4165 
4166 // return TRUE if start or monitor last failure but parameters changed
4167 static bool
4168 should_clear_for_param_change(const xmlNode *xml_op, const char *task,
     /* [previous][next][first][last][top][bottom][index][help] */
4169                               pcmk_resource_t *rsc, pcmk_node_t *node)
4170 {
4171     if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) {
4172         if (pe__bundle_needs_remote_name(rsc)) {
4173             /* We haven't allocated resources yet, so we can't reliably
4174              * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4175              * When that's needed, defer the check until later.
4176              */
4177             pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure,
4178                                 rsc->cluster);
4179 
4180         } else {
4181             pcmk__op_digest_t *digest_data = NULL;
4182 
4183             digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4184                                                 rsc->cluster);
4185             switch (digest_data->rc) {
4186                 case pcmk__digest_unknown:
4187                     crm_trace("Resource %s history entry %s on %s"
4188                               " has no digest to compare",
4189                               rsc->id, pcmk__xe_history_key(xml_op),
4190                               node->details->id);
4191                     break;
4192                 case pcmk__digest_match:
4193                     break;
4194                 default:
4195                     return TRUE;
4196             }
4197         }
4198     }
4199     return FALSE;
4200 }
4201 
4202 // Order action after fencing of remote node, given connection rsc
4203 static void
4204 order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
     /* [previous][next][first][last][top][bottom][index][help] */
4205                            pcmk_scheduler_t *scheduler)
4206 {
4207     pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
4208 
4209     if (remote_node) {
4210         pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4211                                            FALSE, scheduler);
4212 
4213         order_actions(fence, action, pcmk__ar_first_implies_then);
4214     }
4215 }
4216 
4217 static bool
4218 should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
     /* [previous][next][first][last][top][bottom][index][help] */
4219                               guint interval_ms, bool is_last_failure)
4220 {
4221     /* Clearing failures of recurring monitors has special concerns. The
4222      * executor reports only changes in the monitor result, so if the
4223      * monitor is still active and still getting the same failure result,
4224      * that will go undetected after the failure is cleared.
4225      *
4226      * Also, the operation history will have the time when the recurring
4227      * monitor result changed to the given code, not the time when the
4228      * result last happened.
4229      *
4230      * @TODO We probably should clear such failures only when the failure
4231      * timeout has passed since the last occurrence of the failed result.
4232      * However we don't record that information. We could maybe approximate
4233      * that by clearing only if there is a more recent successful monitor or
4234      * stop result, but we don't even have that information at this point
4235      * since we are still unpacking the resource's operation history.
4236      *
4237      * This is especially important for remote connection resources with a
4238      * reconnect interval, so in that case, we skip clearing failures
4239      * if the remote node hasn't been fenced.
4240      */
4241     if (rsc->remote_reconnect_ms
4242         && pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)
4243         && (interval_ms != 0)
4244         && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4245 
4246         pcmk_node_t *remote_node = pcmk_find_node(rsc->cluster, rsc->id);
4247 
4248         if (remote_node && !remote_node->details->remote_was_fenced) {
4249             if (is_last_failure) {
4250                 crm_info("Waiting to clear monitor failure for remote node %s"
4251                          " until fencing has occurred", rsc->id);
4252             }
4253             return TRUE;
4254         }
4255     }
4256     return FALSE;
4257 }
4258 
4259 /*!
4260  * \internal
4261  * \brief Check operation age and schedule failure clearing when appropriate
4262  *
4263  * This function has two distinct purposes. The first is to check whether an
4264  * operation history entry is expired (i.e. the resource has a failure timeout,
4265  * the entry is older than the timeout, and the resource either has no fail
4266  * count or its fail count is entirely older than the timeout). The second is to
4267  * schedule fail count clearing when appropriate (i.e. the operation is expired
4268  * and either the resource has an expired fail count or the operation is a
4269  * last_failure for a remote connection resource with a reconnect interval,
4270  * or the operation is a last_failure for a start or monitor operation and the
4271  * resource's parameters have changed since the operation).
4272  *
4273  * \param[in,out] history  Parsed action result history
4274  *
4275  * \return true if operation history entry is expired, otherwise false
4276  */
4277 static bool
4278 check_operation_expiry(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
4279 {
4280     bool expired = false;
4281     bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
4282     time_t last_run = 0;
4283     int unexpired_fail_count = 0;
4284     const char *clear_reason = NULL;
4285 
4286     if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4287         pcmk__rsc_trace(history->rsc,
4288                         "Resource history entry %s on %s is not expired: "
4289                         "Not Installed does not expire",
4290                         history->id, pcmk__node_name(history->node));
4291         return false; // "Not installed" must always be cleared manually
4292     }
4293 
4294     if ((history->rsc->failure_timeout > 0)
4295         && (crm_element_value_epoch(history->xml, PCMK_XA_LAST_RC_CHANGE,
4296                                     &last_run) == 0)) {
4297 
4298         /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
4299          * timestamp
4300          */
4301 
4302         time_t now = get_effective_time(history->rsc->cluster);
4303         time_t last_failure = 0;
4304 
4305         // Is this particular operation history older than the failure timeout?
4306         if ((now >= (last_run + history->rsc->failure_timeout))
4307             && !should_ignore_failure_timeout(history->rsc, history->task,
4308                                               history->interval_ms,
4309                                               is_last_failure)) {
4310             expired = true;
4311         }
4312 
4313         // Does the resource as a whole have an unexpired fail count?
4314         unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4315                                                 &last_failure,
4316                                                 pcmk__fc_effective,
4317                                                 history->xml);
4318 
4319         // Update scheduler recheck time according to *last* failure
4320         crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
4321                   " last-failure@%lld",
4322                   history->id, (long long) last_run, (expired? "" : "not "),
4323                   (long long) now, unexpired_fail_count,
4324                   history->rsc->failure_timeout, (long long) last_failure);
4325         last_failure += history->rsc->failure_timeout + 1;
4326         if (unexpired_fail_count && (now < last_failure)) {
4327             pe__update_recheck_time(last_failure, history->rsc->cluster,
4328                                     "fail count expiration");
4329         }
4330     }
4331 
4332     if (expired) {
4333         if (pe_get_failcount(history->node, history->rsc, NULL,
4334                              pcmk__fc_default, history->xml)) {
4335             // There is a fail count ignoring timeout
4336 
4337             if (unexpired_fail_count == 0) {
4338                 // There is no fail count considering timeout
4339                 clear_reason = "it expired";
4340 
4341             } else {
4342                 /* This operation is old, but there is an unexpired fail count.
4343                  * In a properly functioning cluster, this should only be
4344                  * possible if this operation is not a failure (otherwise the
4345                  * fail count should be expired too), so this is really just a
4346                  * failsafe.
4347                  */
4348                 pcmk__rsc_trace(history->rsc,
4349                                 "Resource history entry %s on %s is not "
4350                                 "expired: Unexpired fail count",
4351                                 history->id, pcmk__node_name(history->node));
4352                 expired = false;
4353             }
4354 
4355         } else if (is_last_failure
4356                    && (history->rsc->remote_reconnect_ms != 0)) {
4357             /* Clear any expired last failure when reconnect interval is set,
4358              * even if there is no fail count.
4359              */
4360             clear_reason = "reconnect interval is set";
4361         }
4362     }
4363 
4364     if (!expired && is_last_failure
4365         && should_clear_for_param_change(history->xml, history->task,
4366                                          history->rsc, history->node)) {
4367         clear_reason = "resource parameters have changed";
4368     }
4369 
4370     if (clear_reason != NULL) {
4371         pcmk_action_t *clear_op = NULL;
4372 
4373         // Schedule clearing of the fail count
4374         clear_op = pe__clear_failcount(history->rsc, history->node,
4375                                        clear_reason, history->rsc->cluster);
4376 
4377         if (pcmk_is_set(history->rsc->cluster->flags,
4378                         pcmk_sched_fencing_enabled)
4379             && (history->rsc->remote_reconnect_ms != 0)) {
4380             /* If we're clearing a remote connection due to a reconnect
4381              * interval, we want to wait until any scheduled fencing
4382              * completes.
4383              *
4384              * We could limit this to remote_node->details->unclean, but at
4385              * this point, that's always true (it won't be reliable until
4386              * after unpack_node_history() is done).
4387              */
4388             crm_info("Clearing %s failure will wait until any scheduled "
4389                      "fencing of %s completes",
4390                      history->task, history->rsc->id);
4391             order_after_remote_fencing(clear_op, history->rsc,
4392                                        history->rsc->cluster);
4393         }
4394     }
4395 
4396     if (expired && (history->interval_ms == 0)
4397         && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4398         switch (history->exit_status) {
4399             case PCMK_OCF_OK:
4400             case PCMK_OCF_NOT_RUNNING:
4401             case PCMK_OCF_RUNNING_PROMOTED:
4402             case PCMK_OCF_DEGRADED:
4403             case PCMK_OCF_DEGRADED_PROMOTED:
4404                 // Don't expire probes that return these values
4405                 pcmk__rsc_trace(history->rsc,
4406                                 "Resource history entry %s on %s is not "
4407                                 "expired: Probe result",
4408                              history->id, pcmk__node_name(history->node));
4409                 expired = false;
4410                 break;
4411         }
4412     }
4413 
4414     return expired;
4415 }
4416 
4417 int
4418 pe__target_rc_from_xml(const xmlNode *xml_op)
     /* [previous][next][first][last][top][bottom][index][help] */
4419 {
4420     int target_rc = 0;
4421     const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY);
4422 
4423     if (key == NULL) {
4424         return -1;
4425     }
4426     decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4427     return target_rc;
4428 }
4429 
4430 /*!
4431  * \internal
4432  * \brief Update a resource's state for an action result
4433  *
4434  * \param[in,out] history       Parsed action history entry
4435  * \param[in]     exit_status   Exit status to base new state on
4436  * \param[in]     last_failure  Resource's last_failure entry, if known
4437  * \param[in,out] on_fail       Resource's current failure handling
4438  */
4439 static void
4440 update_resource_state(struct action_history *history, int exit_status,
     /* [previous][next][first][last][top][bottom][index][help] */
4441                       const xmlNode *last_failure,
4442                       enum action_fail_response *on_fail)
4443 {
4444     bool clear_past_failure = false;
4445 
4446     if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4447         || (!pcmk__is_bundled(history->rsc)
4448             && pcmk_xe_mask_probe_failure(history->xml))) {
4449         history->rsc->role = pcmk_role_stopped;
4450 
4451     } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4452         clear_past_failure = true;
4453 
4454     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4455                             pcmk__str_none)) {
4456         if ((last_failure != NULL)
4457             && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4458                             pcmk__str_none)) {
4459             clear_past_failure = true;
4460         }
4461         if (history->rsc->role < pcmk_role_started) {
4462             set_active(history->rsc);
4463         }
4464 
4465     } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4466         history->rsc->role = pcmk_role_started;
4467         clear_past_failure = true;
4468 
4469     } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4470         history->rsc->role = pcmk_role_stopped;
4471         clear_past_failure = true;
4472 
4473     } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4474                             pcmk__str_none)) {
4475         history->rsc->role = pcmk_role_promoted;
4476         clear_past_failure = true;
4477 
4478     } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4479                             pcmk__str_none)) {
4480         if (*on_fail == pcmk_on_fail_demote) {
4481             /* Demote clears an error only if
4482              * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
4483              */
4484             clear_past_failure = true;
4485         }
4486         history->rsc->role = pcmk_role_unpromoted;
4487 
4488     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4489                             pcmk__str_none)) {
4490         history->rsc->role = pcmk_role_started;
4491         clear_past_failure = true;
4492 
4493     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4494                             pcmk__str_none)) {
4495         unpack_migrate_to_success(history);
4496 
4497     } else if (history->rsc->role < pcmk_role_started) {
4498         pcmk__rsc_trace(history->rsc, "%s active on %s",
4499                         history->rsc->id, pcmk__node_name(history->node));
4500         set_active(history->rsc);
4501     }
4502 
4503     if (!clear_past_failure) {
4504         return;
4505     }
4506 
4507     switch (*on_fail) {
4508         case pcmk_on_fail_stop:
4509         case pcmk_on_fail_ban:
4510         case pcmk_on_fail_standby_node:
4511         case pcmk_on_fail_fence_node:
4512             pcmk__rsc_trace(history->rsc,
4513                             "%s (%s) is not cleared by a completed %s",
4514                             history->rsc->id, pcmk_on_fail_text(*on_fail),
4515                             history->task);
4516             break;
4517 
4518         case pcmk_on_fail_block:
4519         case pcmk_on_fail_ignore:
4520         case pcmk_on_fail_demote:
4521         case pcmk_on_fail_restart:
4522         case pcmk_on_fail_restart_container:
4523             *on_fail = pcmk_on_fail_ignore;
4524             pe__set_next_role(history->rsc, pcmk_role_unknown,
4525                               "clear past failures");
4526             break;
4527 
4528         case pcmk_on_fail_reset_remote:
4529             if (history->rsc->remote_reconnect_ms == 0) {
4530                 /* With no reconnect interval, the connection is allowed to
4531                  * start again after the remote node is fenced and
4532                  * completely stopped. (With a reconnect interval, we wait
4533                  * for the failure to be cleared entirely before attempting
4534                  * to reconnect.)
4535                  */
4536                 *on_fail = pcmk_on_fail_ignore;
4537                 pe__set_next_role(history->rsc, pcmk_role_unknown,
4538                                   "clear past failures and reset remote");
4539             }
4540             break;
4541     }
4542 }
4543 
4544 /*!
4545  * \internal
4546  * \brief Check whether a given history entry matters for resource state
4547  *
4548  * \param[in] history  Parsed action history entry
4549  *
4550  * \return true if action can affect resource state, otherwise false
4551  */
4552 static inline bool
4553 can_affect_state(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
4554 {
4555 #if 0
4556     /* @COMPAT It might be better to parse only actions we know we're interested
4557      * in, rather than exclude a couple we don't. However that would be a
4558      * behavioral change that should be done at a major or minor series release.
4559      * Currently, unknown operations can affect whether a resource is considered
4560      * active and/or failed.
4561      */
4562      return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4563                              PCMK_ACTION_START, PCMK_ACTION_STOP,
4564                              PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
4565                              PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
4566                              "asyncmon", NULL);
4567 #else
4568      return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY,
4569                               PCMK_ACTION_META_DATA, NULL);
4570 #endif
4571 }
4572 
4573 /*!
4574  * \internal
4575  * \brief Unpack execution/exit status and exit reason from a history entry
4576  *
4577  * \param[in,out] history  Action history entry to unpack
4578  *
4579  * \return Standard Pacemaker return code
4580  */
4581 static int
4582 unpack_action_result(struct action_history *history)
     /* [previous][next][first][last][top][bottom][index][help] */
4583 {
4584     if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS,
4585                                &(history->execution_status)) < 0)
4586         || (history->execution_status < PCMK_EXEC_PENDING)
4587         || (history->execution_status > PCMK_EXEC_MAX)
4588         || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4589         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4590                          "with invalid " PCMK__XA_OP_STATUS " '%s'",
4591                          history->id, history->rsc->id,
4592                          pcmk__node_name(history->node),
4593                          pcmk__s(crm_element_value(history->xml,
4594                                                    PCMK__XA_OP_STATUS),
4595                                  ""));
4596         return pcmk_rc_unpack_error;
4597     }
4598     if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE,
4599                                &(history->exit_status)) < 0)
4600         || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4601 #if 0
4602         /* @COMPAT We should ignore malformed entries, but since that would
4603          * change behavior, it should be done at a major or minor series
4604          * release.
4605          */
4606         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4607                          "with invalid " PCMK__XA_RC_CODE " '%s'",
4608                          history->id, history->rsc->id,
4609                          pcmk__node_name(history->node),
4610                          pcmk__s(crm_element_value(history->xml,
4611                                                    PCMK__XA_RC_CODE),
4612                                  ""));
4613         return pcmk_rc_unpack_error;
4614 #else
4615         history->exit_status = CRM_EX_ERROR;
4616 #endif
4617     }
4618     history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON);
4619     return pcmk_rc_ok;
4620 }
4621 
4622 /*!
4623  * \internal
4624  * \brief Process an action history entry whose result expired
4625  *
4626  * \param[in,out] history           Parsed action history entry
4627  * \param[in]     orig_exit_status  Action exit status before remapping
4628  *
4629  * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the
4630  *         entry needs no further processing)
4631  */
4632 static int
4633 process_expired_result(struct action_history *history, int orig_exit_status)
     /* [previous][next][first][last][top][bottom][index][help] */
4634 {
4635     if (!pcmk__is_bundled(history->rsc)
4636         && pcmk_xe_mask_probe_failure(history->xml)
4637         && (orig_exit_status != history->expected_exit_status)) {
4638 
4639         if (history->rsc->role <= pcmk_role_stopped) {
4640             history->rsc->role = pcmk_role_unknown;
4641         }
4642         crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
4643                   "Masked failure expired",
4644                   history->id, history->rsc->id,
4645                   pcmk__node_name(history->node));
4646         return pcmk_rc_ok;
4647     }
4648 
4649     if (history->exit_status == history->expected_exit_status) {
4650         return pcmk_rc_undetermined; // Only failures expire
4651     }
4652 
4653     if (history->interval_ms == 0) {
4654         crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
4655                    "Expired failure",
4656                    history->id, history->task, history->rsc->id,
4657                    pcmk__node_name(history->node));
4658         return pcmk_rc_ok;
4659     }
4660 
4661     if (history->node->details->online && !history->node->details->unclean) {
4662         /* Reschedule the recurring action. schedule_cancel() won't work at
4663          * this stage, so as a hacky workaround, forcibly change the restart
4664          * digest so pcmk__check_action_config() does what we want later.
4665          *
4666          * @TODO We should skip this if there is a newer successful monitor.
4667          *       Also, this causes rescheduling only if the history entry
4668          *       has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
4669          *       scheduler regression test doesn't, but that may not be a
4670          *       realistic scenario in production).
4671          */
4672         crm_notice("Rescheduling %s-interval %s of %s on %s "
4673                    "after failure expired",
4674                    pcmk__readable_interval(history->interval_ms), history->task,
4675                    history->rsc->id, pcmk__node_name(history->node));
4676         crm_xml_add(history->xml, PCMK__XA_OP_RESTART_DIGEST,
4677                     "calculated-failure-timeout");
4678         return pcmk_rc_ok;
4679     }
4680 
4681     return pcmk_rc_undetermined;
4682 }
4683 
4684 /*!
4685  * \internal
4686  * \brief Process a masked probe failure
4687  *
4688  * \param[in,out] history           Parsed action history entry
4689  * \param[in]     orig_exit_status  Action exit status before remapping
4690  * \param[in]     last_failure      Resource's last_failure entry, if known
4691  * \param[in,out] on_fail           Resource's current failure handling
4692  */
4693 static void
4694 mask_probe_failure(struct action_history *history, int orig_exit_status,
     /* [previous][next][first][last][top][bottom][index][help] */
4695                    const xmlNode *last_failure,
4696                    enum action_fail_response *on_fail)
4697 {
4698     pcmk_resource_t *ban_rsc = history->rsc;
4699 
4700     if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) {
4701         ban_rsc = uber_parent(history->rsc);
4702     }
4703 
4704     crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
4705                services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
4706                pcmk__node_name(history->node));
4707     update_resource_state(history, history->expected_exit_status, last_failure,
4708                           on_fail);
4709     crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
4710 
4711     record_failed_op(history);
4712     resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
4713                       "masked-probe-failure", history->rsc->cluster);
4714 }
4715 
4716 /*!
4717  * \internal Check whether a given failure is for a given pending action
4718  *
4719  * \param[in] history       Parsed history entry for pending action
4720  * \param[in] last_failure  Resource's last_failure entry, if known
4721  *
4722  * \return true if \p last_failure is failure of pending action in \p history,
4723  *         otherwise false
4724  * \note Both \p history and \p last_failure must come from the same
4725  *       \c PCMK__XE_LRM_RESOURCE block, as node and resource are assumed to be
4726  *       the same.
4727  */
4728 static bool
4729 failure_is_newer(const struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
4730                  const xmlNode *last_failure)
4731 {
4732     guint failure_interval_ms = 0U;
4733     long long failure_change = 0LL;
4734     long long this_change = 0LL;
4735 
4736     if (last_failure == NULL) {
4737         return false; // Resource has no last_failure entry
4738     }
4739 
4740     if (!pcmk__str_eq(history->task,
4741                       crm_element_value(last_failure, PCMK_XA_OPERATION),
4742                       pcmk__str_none)) {
4743         return false; // last_failure is for different action
4744     }
4745 
4746     if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL,
4747                               &failure_interval_ms) != pcmk_ok)
4748         || (history->interval_ms != failure_interval_ms)) {
4749         return false; // last_failure is for action with different interval
4750     }
4751 
4752     if ((pcmk__scan_ll(crm_element_value(history->xml, PCMK_XA_LAST_RC_CHANGE),
4753                        &this_change, 0LL) != pcmk_rc_ok)
4754         || (pcmk__scan_ll(crm_element_value(last_failure,
4755                                             PCMK_XA_LAST_RC_CHANGE),
4756                           &failure_change, 0LL) != pcmk_rc_ok)
4757         || (failure_change < this_change)) {
4758         return false; // Failure is not known to be newer
4759     }
4760 
4761     return true;
4762 }
4763 
4764 /*!
4765  * \internal
4766  * \brief Update a resource's role etc. for a pending action
4767  *
4768  * \param[in,out] history       Parsed history entry for pending action
4769  * \param[in]     last_failure  Resource's last_failure entry, if known
4770  */
4771 static void
4772 process_pending_action(struct action_history *history,
     /* [previous][next][first][last][top][bottom][index][help] */
4773                        const xmlNode *last_failure)
4774 {
4775     /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4776      * and there might be a RSC_monitor_INTERVAL entry with the last successful
4777      * or pending result.
4778      *
4779      * If last_failure contains the failure of the pending recurring monitor
4780      * we're processing here, and is newer, the action is no longer pending.
4781      * (Pending results have call ID -1, which sorts last, so the last failure
4782      * if any should be known.)
4783      */
4784     if (failure_is_newer(history, last_failure)) {
4785         return;
4786     }
4787 
4788     if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4789         pcmk__set_rsc_flags(history->rsc, pcmk_rsc_start_pending);
4790         set_active(history->rsc);
4791 
4792     } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4793         history->rsc->role = pcmk_role_promoted;
4794 
4795     } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4796                && history->node->details->unclean) {
4797         /* A migrate_to action is pending on a unclean source, so force a stop
4798          * on the target.
4799          */
4800         const char *migrate_target = NULL;
4801         pcmk_node_t *target = NULL;
4802 
4803         migrate_target = crm_element_value(history->xml,
4804                                            PCMK__META_MIGRATE_TARGET);
4805         target = pcmk_find_node(history->rsc->cluster, migrate_target);
4806         if (target != NULL) {
4807             stop_action(history->rsc, target, FALSE);
4808         }
4809     }
4810 
4811     if (history->rsc->pending_task != NULL) {
4812         /* There should never be multiple pending actions, but as a failsafe,
4813          * just remember the first one processed for display purposes.
4814          */
4815         return;
4816     }
4817 
4818     if (pcmk_is_probe(history->task, history->interval_ms)) {
4819         /* Pending probes are currently never displayed, even if pending
4820          * operations are requested. If we ever want to change that,
4821          * enable the below and the corresponding part of
4822          * native.c:native_pending_task().
4823          */
4824 #if 0
4825         history->rsc->pending_task = strdup("probe");
4826         history->rsc->pending_node = history->node;
4827 #endif
4828     } else {
4829         history->rsc->pending_task = strdup(history->task);
4830         history->rsc->pending_node = history->node;
4831     }
4832 }
4833 
4834 static void
4835 unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
     /* [previous][next][first][last][top][bottom][index][help] */
4836               xmlNode **last_failure, enum action_fail_response *on_fail)
4837 {
4838     int old_rc = 0;
4839     bool expired = false;
4840     pcmk_resource_t *parent = rsc;
4841     enum rsc_role_e fail_role = pcmk_role_unknown;
4842     enum action_fail_response failure_strategy = pcmk_on_fail_restart;
4843 
4844     struct action_history history = {
4845         .rsc = rsc,
4846         .node = node,
4847         .xml = xml_op,
4848         .execution_status = PCMK_EXEC_UNKNOWN,
4849     };
4850 
4851     CRM_CHECK(rsc && node && xml_op, return);
4852 
4853     history.id = pcmk__xe_id(xml_op);
4854     if (history.id == NULL) {
4855         pcmk__config_err("Ignoring resource history entry for %s on %s "
4856                          "without ID", rsc->id, pcmk__node_name(node));
4857         return;
4858     }
4859 
4860     // Task and interval
4861     history.task = crm_element_value(xml_op, PCMK_XA_OPERATION);
4862     if (history.task == NULL) {
4863         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4864                          "without " PCMK_XA_OPERATION,
4865                          history.id, rsc->id, pcmk__node_name(node));
4866         return;
4867     }
4868     crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
4869     if (!can_affect_state(&history)) {
4870         pcmk__rsc_trace(rsc,
4871                         "Ignoring resource history entry %s for %s on %s "
4872                         "with irrelevant action '%s'",
4873                         history.id, rsc->id, pcmk__node_name(node),
4874                         history.task);
4875         return;
4876     }
4877 
4878     if (unpack_action_result(&history) != pcmk_rc_ok) {
4879         return; // Error already logged
4880     }
4881 
4882     history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4883     history.key = pcmk__xe_history_key(xml_op);
4884     crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
4885 
4886     pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4887                     history.id, history.task, history.call_id,
4888                     pcmk__node_name(node),
4889                     pcmk_exec_status_str(history.execution_status),
4890                     crm_exit_str(history.exit_status));
4891 
4892     if (node->details->unclean) {
4893         pcmk__rsc_trace(rsc,
4894                         "%s is running on %s, which is unclean (further action "
4895                         "depends on value of stop's on-fail attribute)",
4896                         rsc->id, pcmk__node_name(node));
4897     }
4898 
4899     expired = check_operation_expiry(&history);
4900     old_rc = history.exit_status;
4901 
4902     remap_operation(&history, on_fail, expired);
4903 
4904     if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4905         goto done;
4906     }
4907 
4908     if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4909         mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4910         goto done;
4911     }
4912 
4913     if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
4914         parent = uber_parent(rsc);
4915     }
4916 
4917     switch (history.execution_status) {
4918         case PCMK_EXEC_PENDING:
4919             process_pending_action(&history, *last_failure);
4920             goto done;
4921 
4922         case PCMK_EXEC_DONE:
4923             update_resource_state(&history, history.exit_status, *last_failure,
4924                                   on_fail);
4925             goto done;
4926 
4927         case PCMK_EXEC_NOT_INSTALLED:
4928             unpack_failure_handling(&history, &failure_strategy, &fail_role);
4929             if (failure_strategy == pcmk_on_fail_ignore) {
4930                 crm_warn("Cannot ignore failed %s of %s on %s: "
4931                          "Resource agent doesn't exist "
4932                          CRM_XS " status=%d rc=%d id=%s",
4933                          history.task, rsc->id, pcmk__node_name(node),
4934                          history.execution_status, history.exit_status,
4935                          history.id);
4936                 /* Also for printing it as "FAILED" by marking it as
4937                  * pcmk_rsc_failed later
4938                  */
4939                 *on_fail = pcmk_on_fail_ban;
4940             }
4941             resource_location(parent, node, -PCMK_SCORE_INFINITY,
4942                               "hard-error", rsc->cluster);
4943             unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4944                                   last_failure, on_fail);
4945             goto done;
4946 
4947         case PCMK_EXEC_NOT_CONNECTED:
4948             if (pcmk__is_pacemaker_remote_node(node)
4949                 && pcmk_is_set(node->details->remote_rsc->flags,
4950                                pcmk_rsc_managed)) {
4951                 /* We should never get into a situation where a managed remote
4952                  * connection resource is considered OK but a resource action
4953                  * behind the connection gets a "not connected" status. But as a
4954                  * fail-safe in case a bug or unusual circumstances do lead to
4955                  * that, ensure the remote connection is considered failed.
4956                  */
4957                 pcmk__set_rsc_flags(node->details->remote_rsc,
4958                                     pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
4959             }
4960             break; // Not done, do error handling
4961 
4962         case PCMK_EXEC_ERROR:
4963         case PCMK_EXEC_ERROR_HARD:
4964         case PCMK_EXEC_ERROR_FATAL:
4965         case PCMK_EXEC_TIMEOUT:
4966         case PCMK_EXEC_NOT_SUPPORTED:
4967         case PCMK_EXEC_INVALID:
4968             break; // Not done, do error handling
4969 
4970         default: // No other value should be possible at this point
4971             break;
4972     }
4973 
4974     unpack_failure_handling(&history, &failure_strategy, &fail_role);
4975     if ((failure_strategy == pcmk_on_fail_ignore)
4976         || ((failure_strategy == pcmk_on_fail_restart_container)
4977             && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4978 
4979         char *last_change_s = last_change_str(xml_op);
4980 
4981         crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4982                  CRM_XS " %s",
4983                  history.task, services_ocf_exitcode_str(history.exit_status),
4984                  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4985                  pcmk__s(history.exit_reason, ""), rsc->id,
4986                  pcmk__node_name(node), last_change_s, history.id);
4987         free(last_change_s);
4988 
4989         update_resource_state(&history, history.expected_exit_status,
4990                               *last_failure, on_fail);
4991         crm_xml_add(xml_op, PCMK_XA_UNAME, node->details->uname);
4992         pcmk__set_rsc_flags(rsc, pcmk_rsc_ignore_failure);
4993 
4994         record_failed_op(&history);
4995 
4996         if ((failure_strategy == pcmk_on_fail_restart_container)
4997             && cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) {
4998             *on_fail = failure_strategy;
4999         }
5000 
5001     } else {
5002         unpack_rsc_op_failure(&history, failure_strategy, fail_role,
5003                               last_failure, on_fail);
5004 
5005         if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
5006             uint8_t log_level = LOG_ERR;
5007 
5008             if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
5009                 log_level = LOG_NOTICE;
5010             }
5011             do_crm_log(log_level,
5012                        "Preventing %s from restarting on %s because "
5013                        "of hard failure (%s%s%s) " CRM_XS " %s",
5014                        parent->id, pcmk__node_name(node),
5015                        services_ocf_exitcode_str(history.exit_status),
5016                        (pcmk__str_empty(history.exit_reason)? "" : ": "),
5017                        pcmk__s(history.exit_reason, ""), history.id);
5018             resource_location(parent, node, -PCMK_SCORE_INFINITY,
5019                               "hard-error", rsc->cluster);
5020 
5021         } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
5022             pcmk__sched_err("Preventing %s from restarting anywhere because "
5023                             "of fatal failure (%s%s%s) " CRM_XS " %s",
5024                             parent->id,
5025                             services_ocf_exitcode_str(history.exit_status),
5026                             (pcmk__str_empty(history.exit_reason)? "" : ": "),
5027                             pcmk__s(history.exit_reason, ""), history.id);
5028             resource_location(parent, NULL, -PCMK_SCORE_INFINITY,
5029                               "fatal-error", rsc->cluster);
5030         }
5031     }
5032 
5033 done:
5034     pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
5035                     rsc->id, pcmk__node_name(node), history.id,
5036                     pcmk_role_text(rsc->role),
5037                     pcmk_role_text(rsc->next_role));
5038 }
5039 
5040 static void
5041 add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
     /* [previous][next][first][last][top][bottom][index][help] */
5042                pcmk_scheduler_t *scheduler)
5043 {
5044     const char *cluster_name = NULL;
5045 
5046     pe_rule_eval_data_t rule_data = {
5047         .node_hash = NULL,
5048         .now = scheduler->now,
5049         .match_data = NULL,
5050         .rsc_data = NULL,
5051         .op_data = NULL
5052     };
5053 
5054     pcmk__insert_dup(node->details->attrs,
5055                      CRM_ATTR_UNAME, node->details->uname);
5056 
5057     pcmk__insert_dup(node->details->attrs, CRM_ATTR_ID, node->details->id);
5058     if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) {
5059         scheduler->dc_node = node;
5060         node->details->is_dc = TRUE;
5061         pcmk__insert_dup(node->details->attrs,
5062                          CRM_ATTR_IS_DC, PCMK_VALUE_TRUE);
5063     } else {
5064         pcmk__insert_dup(node->details->attrs,
5065                          CRM_ATTR_IS_DC, PCMK_VALUE_FALSE);
5066     }
5067 
5068     cluster_name = g_hash_table_lookup(scheduler->config_hash,
5069                                        PCMK_OPT_CLUSTER_NAME);
5070     if (cluster_name) {
5071         pcmk__insert_dup(node->details->attrs, CRM_ATTR_CLUSTER_NAME,
5072                          cluster_name);
5073     }
5074 
5075     pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES, &rule_data,
5076                                node->details->attrs, NULL, overwrite,
5077                                scheduler);
5078 
5079     pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_data,
5080                                node->details->utilization, NULL,
5081                                FALSE, scheduler);
5082 
5083     if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
5084                         pcmk__rsc_node_current) == NULL) {
5085         const char *site_name = pcmk__node_attr(node, "site-name", NULL,
5086                                                 pcmk__rsc_node_current);
5087 
5088         if (site_name) {
5089             pcmk__insert_dup(node->details->attrs,
5090                              CRM_ATTR_SITE_NAME, site_name);
5091 
5092         } else if (cluster_name) {
5093             /* Default to cluster-name if unset */
5094             pcmk__insert_dup(node->details->attrs,
5095                              CRM_ATTR_SITE_NAME, cluster_name);
5096         }
5097     }
5098 }
5099 
5100 static GList *
5101 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
     /* [previous][next][first][last][top][bottom][index][help] */
5102 {
5103     int counter = -1;
5104     int stop_index = -1;
5105     int start_index = -1;
5106 
5107     xmlNode *rsc_op = NULL;
5108 
5109     GList *gIter = NULL;
5110     GList *op_list = NULL;
5111     GList *sorted_op_list = NULL;
5112 
5113     /* extract operations */
5114     op_list = NULL;
5115     sorted_op_list = NULL;
5116 
5117     for (rsc_op = pcmk__xe_first_child(rsc_entry, NULL, NULL, NULL);
5118          rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
5119 
5120         if (pcmk__xe_is(rsc_op, PCMK__XE_LRM_RSC_OP)) {
5121             crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc);
5122             crm_xml_add(rsc_op, PCMK_XA_UNAME, node);
5123             op_list = g_list_prepend(op_list, rsc_op);
5124         }
5125     }
5126 
5127     if (op_list == NULL) {
5128         /* if there are no operations, there is nothing to do */
5129         return NULL;
5130     }
5131 
5132     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
5133 
5134     /* create active recurring operations as optional */
5135     if (active_filter == FALSE) {
5136         return sorted_op_list;
5137     }
5138 
5139     op_list = NULL;
5140 
5141     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
5142 
5143     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5144         xmlNode *rsc_op = (xmlNode *) gIter->data;
5145 
5146         counter++;
5147 
5148         if (start_index < stop_index) {
5149             crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
5150             break;
5151 
5152         } else if (counter < start_index) {
5153             crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op));
5154             continue;
5155         }
5156         op_list = g_list_append(op_list, rsc_op);
5157     }
5158 
5159     g_list_free(sorted_op_list);
5160     return op_list;
5161 }
5162 
5163 GList *
5164 find_operations(const char *rsc, const char *node, gboolean active_filter,
     /* [previous][next][first][last][top][bottom][index][help] */
5165                 pcmk_scheduler_t *scheduler)
5166 {
5167     GList *output = NULL;
5168     GList *intermediate = NULL;
5169 
5170     xmlNode *tmp = NULL;
5171     xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS,
5172                                            NULL, NULL);
5173 
5174     pcmk_node_t *this_node = NULL;
5175 
5176     xmlNode *node_state = NULL;
5177 
5178     CRM_CHECK(status != NULL, return NULL);
5179 
5180     for (node_state = pcmk__xe_first_child(status, NULL, NULL, NULL);
5181          node_state != NULL; node_state = pcmk__xe_next(node_state)) {
5182 
5183         if (pcmk__xe_is(node_state, PCMK__XE_NODE_STATE)) {
5184             const char *uname = crm_element_value(node_state, PCMK_XA_UNAME);
5185 
5186             if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5187                 continue;
5188             }
5189 
5190             this_node = pcmk_find_node(scheduler, uname);
5191             if(this_node == NULL) {
5192                 CRM_LOG_ASSERT(this_node != NULL);
5193                 continue;
5194 
5195             } else if (pcmk__is_pacemaker_remote_node(this_node)) {
5196                 determine_remote_online_status(scheduler, this_node);
5197 
5198             } else {
5199                 determine_online_status(node_state, this_node, scheduler);
5200             }
5201 
5202             if (this_node->details->online
5203                 || pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
5204                 /* offline nodes run no resources...
5205                  * unless stonith is enabled in which case we need to
5206                  *   make sure rsc start events happen after the stonith
5207                  */
5208                 xmlNode *lrm_rsc = NULL;
5209 
5210                 tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
5211                                            NULL);
5212                 tmp = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCES, NULL,
5213                                            NULL);
5214 
5215                 for (lrm_rsc = pcmk__xe_first_child(tmp, NULL, NULL, NULL);
5216                      lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) {
5217 
5218                     if (pcmk__xe_is(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
5219                         const char *rsc_id = crm_element_value(lrm_rsc,
5220                                                                PCMK_XA_ID);
5221 
5222                         if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
5223                             continue;
5224                         }
5225 
5226                         intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5227                         output = g_list_concat(output, intermediate);
5228                     }
5229                 }
5230             }
5231         }
5232     }
5233 
5234     return output;
5235 }

/* [previous][next][first][last][top][bottom][index][help] */