pacemaker  2.1.8-3980678f03
Scalable High-Availability cluster resource manager
unpack.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2024 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU Lesser General Public License
7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <stdio.h>
13 #include <string.h>
14 #include <glib.h>
15 #include <time.h>
16 
17 #include <crm/crm.h>
18 #include <crm/services.h>
19 #include <crm/common/xml.h>
21 
22 #include <crm/common/util.h>
23 #include <crm/pengine/rules.h>
24 #include <crm/pengine/internal.h>
25 #include <pe_status_private.h>
26 
27 CRM_TRACE_INIT_DATA(pe_status);
28 
29 // A (parsed) resource action history entry
30 struct action_history {
31  pcmk_resource_t *rsc; // Resource that history is for
32  pcmk_node_t *node; // Node that history is for
33  xmlNode *xml; // History entry XML
34 
35  // Parsed from entry XML
36  const char *id; // XML ID of history entry
37  const char *key; // Operation key of action
38  const char *task; // Action name
39  const char *exit_reason; // Exit reason given for result
40  guint interval_ms; // Action interval
41  int call_id; // Call ID of action
42  int expected_exit_status; // Expected exit status of action
43  int exit_status; // Actual exit status of action
44  int execution_status; // Execution status of action
45 };
46 
47 /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
48  * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
49  * flag is stringified more readably in log messages.
50  */
51 #define set_config_flag(scheduler, option, flag) do { \
52  GHashTable *config_hash = (scheduler)->config_hash; \
53  const char *scf_value = pcmk__cluster_option(config_hash, (option)); \
54  \
55  if (scf_value != NULL) { \
56  if (crm_is_true(scf_value)) { \
57  (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \
58  LOG_TRACE, "Scheduler", \
59  crm_system_name, (scheduler)->flags, \
60  (flag), #flag); \
61  } else { \
62  (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
63  LOG_TRACE, "Scheduler", \
64  crm_system_name, (scheduler)->flags, \
65  (flag), #flag); \
66  } \
67  } \
68  } while(0)
69 
70 static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
71  xmlNode *xml_op, xmlNode **last_failure,
72  enum action_fail_response *failed);
73 static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
74  pcmk_node_t *this_node);
75 static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
76  bool overwrite, pcmk_scheduler_t *scheduler);
77 static void determine_online_status(const xmlNode *node_state,
78  pcmk_node_t *this_node,
80 
81 static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
83 
84 
85 static gboolean
86 is_dangling_guest_node(pcmk_node_t *node)
87 {
88  /* we are looking for a remote-node that was supposed to be mapped to a
89  * container resource, but all traces of that container have disappeared
90  * from both the config and the status section. */
91  if (pcmk__is_pacemaker_remote_node(node)
92  && (node->details->remote_rsc != NULL)
93  && (node->details->remote_rsc->container == NULL)
96  return TRUE;
97  }
98 
99  return FALSE;
100 }
101 
111 void
113  const char *reason, bool priority_delay)
114 {
115  CRM_CHECK(node, return);
116 
117  /* A guest node is fenced by marking its container as failed */
118  if (pcmk__is_guest_or_bundle_node(node)) {
120 
121  if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
122  if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
123  crm_notice("Not fencing guest node %s "
124  "(otherwise would because %s): "
125  "its guest resource %s is unmanaged",
126  pcmk__node_name(node), reason, rsc->id);
127  } else {
128  pcmk__sched_warn("Guest node %s will be fenced "
129  "(by recovering its guest resource %s): %s",
130  pcmk__node_name(node), rsc->id, reason);
131 
132  /* We don't mark the node as unclean because that would prevent the
133  * node from running resources. We want to allow it to run resources
134  * in this transition if the recovery succeeds.
135  */
136  node->details->remote_requires_reset = TRUE;
139  }
140  }
141 
142  } else if (is_dangling_guest_node(node)) {
143  crm_info("Cleaning up dangling connection for guest node %s: "
144  "fencing was already done because %s, "
145  "and guest resource no longer exists",
146  pcmk__node_name(node), reason);
149 
150  } else if (pcmk__is_remote_node(node)) {
151  pcmk_resource_t *rsc = node->details->remote_rsc;
152 
153  if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
154  crm_notice("Not fencing remote node %s "
155  "(otherwise would because %s): connection is unmanaged",
156  pcmk__node_name(node), reason);
157  } else if(node->details->remote_requires_reset == FALSE) {
158  node->details->remote_requires_reset = TRUE;
159  pcmk__sched_warn("Remote node %s %s: %s",
160  pcmk__node_name(node),
161  pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
162  reason);
163  }
164  node->details->unclean = TRUE;
165  // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
166  pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
167 
168  } else if (node->details->unclean) {
169  crm_trace("Cluster node %s %s because %s",
170  pcmk__node_name(node),
171  pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
172  reason);
173 
174  } else {
175  pcmk__sched_warn("Cluster node %s %s: %s",
176  pcmk__node_name(node),
177  pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
178  reason);
179  node->details->unclean = TRUE;
180  pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
181  }
182 }
183 
184 // @TODO xpaths can't handle templates, rules, or id-refs
185 
186 // nvpair with provides or requires set to unfencing
187 #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \
188  "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'" \
189  "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \
190  "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
191 
192 // unfencing in rsc_defaults or any resource
193 #define XPATH_ENABLE_UNFENCING \
194  "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \
195  "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \
196  "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
197  "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
198 
199 static void
200 set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
201 {
202  xmlXPathObjectPtr result = NULL;
203 
204  if (!pcmk_is_set(scheduler->flags, flag)) {
205  result = xpath_search(scheduler->input, xpath);
206  if (result && (numXpathResults(result) > 0)) {
208  }
210  }
211 }
212 
213 gboolean
215 {
216  const char *value = NULL;
217  guint interval_ms = 0U;
218  GHashTable *config_hash = pcmk__strkey_table(free, free);
219 
220  pe_rule_eval_data_t rule_data = {
221  .node_hash = NULL,
222  .now = scheduler->now,
223  .match_data = NULL,
224  .rsc_data = NULL,
225  .op_data = NULL
226  };
227 
228  scheduler->config_hash = config_hash;
229 
232  FALSE, scheduler);
233 
234  pcmk__validate_cluster_options(config_hash);
235 
239  crm_info("Startup probes: disabled (dangerous)");
240  }
241 
242  value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
243  if (value && crm_is_true(value)) {
244  crm_info("Watchdog-based self-fencing will be performed via SBD if "
245  "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
246  " is nonzero");
248  }
249 
250  /* Set certain flags via xpath here, so they can be used before the relevant
251  * configuration sections are unpacked.
252  */
254  scheduler);
255 
256  value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT);
257  pcmk_parse_interval_spec(value, &interval_ms);
258 
259  if (interval_ms >= INT_MAX) {
260  scheduler->stonith_timeout = INT_MAX;
261  } else {
262  scheduler->stonith_timeout = (int) interval_ms;
263  }
264  crm_debug("STONITH timeout: %d", scheduler->stonith_timeout);
265 
269  crm_debug("STONITH of failed nodes is enabled");
270  } else {
271  crm_debug("STONITH of failed nodes is disabled");
272  }
273 
278  "Support for " PCMK_OPT_STONITH_ACTION " of "
279  "'" PCMK__ACTION_POWEROFF "' is deprecated and will be "
280  "removed in a future release "
281  "(use '" PCMK_ACTION_OFF "' instead)");
283  }
284  crm_trace("STONITH will %s nodes", scheduler->stonith_action);
285 
289  crm_debug("Concurrent fencing is enabled");
290  } else {
291  crm_debug("Concurrent fencing is disabled");
292  }
293 
295  if (value) {
296  pcmk_parse_interval_spec(value, &interval_ms);
297  scheduler->priority_fencing_delay = (int) (interval_ms / 1000);
298  crm_trace("Priority fencing delay is %ds",
300  }
301 
304  crm_debug("Stop all active resources: %s",
305  pcmk__flag_text(scheduler->flags, pcmk_sched_stop_all));
306 
310  crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
311  }
312 
313  value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
314 
315  if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
317 
318  } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
320 
321  } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
323 
324  } else if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei)) {
326  int do_panic = 0;
327 
329  &do_panic);
330  if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
332  } else {
334  " to 'stop': cluster has never had quorum");
336  }
337  } else {
339  " to 'stop' because fencing is disabled");
341  }
342 
343  } else {
345  }
346 
347  switch (scheduler->no_quorum_policy) {
349  crm_debug("On loss of quorum: Freeze resources");
350  break;
351  case pcmk_no_quorum_stop:
352  crm_debug("On loss of quorum: Stop ALL resources");
353  break;
355  crm_debug("On loss of quorum: "
356  "Demote promotable resources and stop other resources");
357  break;
359  crm_notice("On loss of quorum: Fence all remaining nodes");
360  break;
362  crm_notice("On loss of quorum: Ignore");
363  break;
364  }
365 
369  crm_trace("Orphan resources are stopped");
370  } else {
371  crm_trace("Orphan resources are ignored");
372  }
373 
377  crm_trace("Orphan resource actions are stopped");
378  } else {
379  crm_trace("Orphan resource actions are ignored");
380  }
381 
383  if (value != NULL) {
384  if (crm_is_true(value)) {
387  "Support for the " PCMK__OPT_REMOVE_AFTER_STOP
388  " cluster property is deprecated and will be "
389  "removed in a future release");
390  } else {
393  }
394  }
395 
398  crm_trace("Maintenance mode: %s",
399  pcmk__flag_text(scheduler->flags, pcmk_sched_in_maintenance));
400 
404  crm_trace("Start failures are always fatal");
405  } else {
406  crm_trace("Start failures are handled by failcount");
407  }
408 
412  }
414  crm_trace("Unseen nodes will be fenced");
415  } else {
417  "Blind faith: not fencing unseen nodes");
418  }
419 
421 
424  crm_trace("Placement strategy: %s", scheduler->placement_strategy);
425 
431  scheduler->shutdown_lock /= 1000;
432  crm_trace("Resources will be locked to nodes that were cleanly "
433  "shut down (locks expire after %s)",
435  } else {
436  crm_trace("Resources will not be locked to nodes that were cleanly "
437  "shut down");
438  }
439 
443  if (scheduler->node_pending_timeout == 0) {
444  crm_trace("Do not fence pending nodes");
445  } else {
446  crm_trace("Fence pending nodes after %s",
448  * 1000));
449  }
450 
451  return TRUE;
452 }
453 
454 pcmk_node_t *
455 pe_create_node(const char *id, const char *uname, const char *type,
456  const char *score, pcmk_scheduler_t *scheduler)
457 {
458  pcmk_node_t *new_node = NULL;
459 
460  if (pcmk_find_node(scheduler, uname) != NULL) {
461  pcmk__config_warn("More than one node entry has name '%s'", uname);
462  }
463 
464  new_node = calloc(1, sizeof(pcmk_node_t));
465  if (new_node == NULL) {
466  pcmk__sched_err("Could not allocate memory for node %s", uname);
467  return NULL;
468  }
469 
470  new_node->weight = char2score(score);
471  new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
472 
473  if (new_node->details == NULL) {
474  free(new_node);
475  pcmk__sched_err("Could not allocate memory for node %s", uname);
476  return NULL;
477  }
478 
479  crm_trace("Creating node for entry %s/%s", uname, id);
480  new_node->details->id = id;
481  new_node->details->uname = uname;
482  new_node->details->online = FALSE;
483  new_node->details->shutdown = FALSE;
484  new_node->details->rsc_discovery_enabled = TRUE;
485  new_node->details->running_rsc = NULL;
486  new_node->details->data_set = scheduler;
487 
488  if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
491 
492  } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
495 
496  } else {
497  /* @COMPAT 'ping' is the default for backward compatibility, but it
498  * should be changed to 'member' at a compatibility break
499  */
500  if (!pcmk__str_eq(type, PCMK__VALUE_PING, pcmk__str_casei)) {
501  pcmk__config_warn("Node %s has unrecognized type '%s', "
502  "assuming '" PCMK__VALUE_PING "'",
503  pcmk__s(uname, "without name"), type);
504  }
506  "Support for nodes of type '" PCMK__VALUE_PING "' "
507  "(such as %s) is deprecated and will be removed in a "
508  "future release",
509  pcmk__s(uname, "unnamed node"));
510  new_node->details->type = node_ping;
511  }
512 
513  new_node->details->attrs = pcmk__strkey_table(free, free);
514 
515  if (pcmk__is_pacemaker_remote_node(new_node)) {
516  pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "remote");
517  } else {
518  pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "cluster");
519  }
520 
521  new_node->details->utilization = pcmk__strkey_table(free, free);
522  new_node->details->digest_cache = pcmk__strkey_table(free,
524 
525  scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
527  return new_node;
528 }
529 
530 static const char *
531 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
532 {
533  xmlNode *attr_set = NULL;
534  xmlNode *attr = NULL;
535 
536  const char *container_id = pcmk__xe_id(xml_obj);
537  const char *remote_name = NULL;
538  const char *remote_server = NULL;
539  const char *remote_port = NULL;
540  const char *connect_timeout = "60s";
541  const char *remote_allow_migrate=NULL;
542  const char *is_managed = NULL;
543 
544  for (attr_set = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
545  attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) {
546 
547  if (!pcmk__xe_is(attr_set, PCMK_XE_META_ATTRIBUTES)) {
548  continue;
549  }
550 
551  for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
552  attr != NULL; attr = pcmk__xe_next(attr)) {
553 
554  const char *value = crm_element_value(attr, PCMK_XA_VALUE);
555  const char *name = crm_element_value(attr, PCMK_XA_NAME);
556 
557  if (name == NULL) { // Sanity
558  continue;
559  }
560 
561  if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
562  remote_name = value;
563 
564  } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
565  remote_server = value;
566 
567  } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
568  remote_port = value;
569 
570  } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
571  connect_timeout = value;
572 
573  } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
574  remote_allow_migrate = value;
575 
576  } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
577  is_managed = value;
578  }
579  }
580  }
581 
582  if (remote_name == NULL) {
583  return NULL;
584  }
585 
586  if (pe_find_resource(data->resources, remote_name) != NULL) {
587  return NULL;
588  }
589 
590  pe_create_remote_xml(parent, remote_name, container_id,
591  remote_allow_migrate, is_managed,
592  connect_timeout, remote_server, remote_port);
593  return remote_name;
594 }
595 
596 static void
597 handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
598 {
599  if ((new_node->details->type == pcmk_node_variant_remote)
600  && (new_node->details->remote_rsc == NULL)) {
601  /* Ignore fencing for remote nodes that don't have a connection resource
602  * associated with them. This happens when remote node entries get left
603  * in the nodes section after the connection resource is removed.
604  */
605  return;
606  }
607 
609  // All nodes are unclean until we've seen their status entry
610  new_node->details->unclean = TRUE;
611 
612  } else {
613  // Blind faith ...
614  new_node->details->unclean = FALSE;
615  }
616 
617  /* We need to be able to determine if a node's status section
618  * exists or not separate from whether the node is unclean. */
619  new_node->details->unseen = TRUE;
620 }
621 
622 gboolean
624 {
625  xmlNode *xml_obj = NULL;
626  pcmk_node_t *new_node = NULL;
627  const char *id = NULL;
628  const char *uname = NULL;
629  const char *type = NULL;
630  const char *score = NULL;
631 
632  for (xml_obj = pcmk__xe_first_child(xml_nodes, NULL, NULL, NULL);
633  xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
634 
635  if (pcmk__xe_is(xml_obj, PCMK_XE_NODE)) {
636  new_node = NULL;
637 
638  id = crm_element_value(xml_obj, PCMK_XA_ID);
640  type = crm_element_value(xml_obj, PCMK_XA_TYPE);
641  score = crm_element_value(xml_obj, PCMK_XA_SCORE);
642  crm_trace("Processing node %s/%s", uname, id);
643 
644  if (id == NULL) {
645  pcmk__config_err("Ignoring <" PCMK_XE_NODE
646  "> entry in configuration without id");
647  continue;
648  }
649  new_node = pe_create_node(id, uname, type, score, scheduler);
650 
651  if (new_node == NULL) {
652  return FALSE;
653  }
654 
655  handle_startup_fencing(scheduler, new_node);
656 
657  add_node_attrs(xml_obj, new_node, FALSE, scheduler);
658 
659  crm_trace("Done with node %s",
660  crm_element_value(xml_obj, PCMK_XA_UNAME));
661  }
662  }
663 
664  if (scheduler->localhost
665  && (pcmk_find_node(scheduler, scheduler->localhost) == NULL)) {
666  crm_info("Creating a fake local node");
668  scheduler);
669  }
670 
671  return TRUE;
672 }
673 
674 static void
675 setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
676 {
677  const char *container_id = NULL;
678 
679  if (rsc->children) {
680  g_list_foreach(rsc->children, (GFunc) setup_container, scheduler);
681  return;
682  }
683 
684  container_id = g_hash_table_lookup(rsc->meta, PCMK__META_CONTAINER);
685  if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) {
687  container_id);
688 
689  if (container) {
690  rsc->container = container;
692  container->fillers = g_list_append(container->fillers, rsc);
693  pcmk__rsc_trace(rsc, "Resource %s's container is %s",
694  rsc->id, container_id);
695  } else {
696  pcmk__config_err("Resource %s: Unknown resource container (%s)",
697  rsc->id, container_id);
698  }
699  }
700 }
701 
702 gboolean
704 {
705  xmlNode *xml_obj = NULL;
706 
707  /* Create remote nodes and guest nodes from the resource configuration
708  * before unpacking resources.
709  */
710  for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
711  xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
712 
713  const char *new_node_id = NULL;
714 
715  /* Check for remote nodes, which are defined by ocf:pacemaker:remote
716  * primitives.
717  */
718  if (xml_contains_remote_node(xml_obj)) {
719  new_node_id = pcmk__xe_id(xml_obj);
720  /* The pcmk_find_node() check ensures we don't iterate over an
721  * expanded node that has already been added to the node list
722  */
723  if (new_node_id
724  && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
725  crm_trace("Found remote node %s defined by resource %s",
726  new_node_id, pcmk__xe_id(xml_obj));
727  pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
728  NULL, scheduler);
729  }
730  continue;
731  }
732 
733  /* Check for guest nodes, which are defined by special meta-attributes
734  * of a primitive of any type (for example, VirtualDomain or Xen).
735  */
736  if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
737  /* This will add an ocf:pacemaker:remote primitive to the
738  * configuration for the guest node's connection, to be unpacked
739  * later.
740  */
741  new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
742  scheduler);
743  if (new_node_id
744  && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
745  crm_trace("Found guest node %s in resource %s",
746  new_node_id, pcmk__xe_id(xml_obj));
747  pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
748  NULL, scheduler);
749  }
750  continue;
751  }
752 
753  /* Check for guest nodes inside a group. Clones are currently not
754  * supported as guest nodes.
755  */
756  if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
757  xmlNode *xml_obj2 = NULL;
758  for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
759  xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) {
760 
761  new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
762  scheduler);
763 
764  if (new_node_id
765  && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
766  crm_trace("Found guest node %s in resource %s inside group %s",
767  new_node_id, pcmk__xe_id(xml_obj2),
768  pcmk__xe_id(xml_obj));
769  pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
770  NULL, scheduler);
771  }
772  }
773  }
774  }
775  return TRUE;
776 }
777 
778 /* Call this after all the nodes and resources have been
779  * unpacked, but before the status section is read.
780  *
781  * A remote node's online status is reflected by the state
782  * of the remote node's connection resource. We need to link
783  * the remote node to this connection resource so we can have
784  * easy access to the connection resource during the scheduler calculations.
785  */
786 static void
787 link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
788 {
789  pcmk_node_t *remote_node = NULL;
790 
791  if (new_rsc->is_remote_node == FALSE) {
792  return;
793  }
794 
796  /* remote_nodes and remote_resources are not linked in quick location calculations */
797  return;
798  }
799 
800  remote_node = pcmk_find_node(scheduler, new_rsc->id);
801  CRM_CHECK(remote_node != NULL, return);
802 
803  pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
804  new_rsc->id, pcmk__node_name(remote_node));
805  remote_node->details->remote_rsc = new_rsc;
806 
807  if (new_rsc->container == NULL) {
808  /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
809  * the same as is done for cluster nodes.
810  */
811  handle_startup_fencing(scheduler, remote_node);
812 
813  } else {
814  /* pe_create_node() marks the new node as "remote" or "cluster"; now
815  * that we know the node is a guest node, update it correctly.
816  */
817  pcmk__insert_dup(remote_node->details->attrs,
818  CRM_ATTR_KIND, "container");
819  }
820 }
821 
822 static void
823 destroy_tag(gpointer data)
824 {
825  pcmk_tag_t *tag = data;
826 
827  if (tag) {
828  free(tag->id);
829  g_list_free_full(tag->refs, free);
830  free(tag);
831  }
832 }
833 
846 gboolean
847 unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
848 {
849  xmlNode *xml_obj = NULL;
850  GList *gIter = NULL;
851 
852  scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag);
853 
854  for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
855  xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
856 
857  pcmk_resource_t *new_rsc = NULL;
858  const char *id = pcmk__xe_id(xml_obj);
859 
860  if (pcmk__str_empty(id)) {
861  pcmk__config_err("Ignoring <%s> resource without ID",
862  xml_obj->name);
863  continue;
864  }
865 
866  if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
867  if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id,
868  NULL, NULL) == FALSE) {
869  /* Record the template's ID for the knowledge of its existence anyway. */
871  }
872  continue;
873  }
874 
875  crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
876  if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
877  scheduler) == pcmk_rc_ok) {
878  scheduler->resources = g_list_append(scheduler->resources, new_rsc);
879  pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
880 
881  } else {
882  pcmk__config_err("Ignoring <%s> resource '%s' "
883  "because configuration is invalid",
884  xml_obj->name, id);
885  }
886  }
887 
888  for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) {
889  pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
890 
891  setup_container(rsc, scheduler);
892  link_rsc2remotenode(scheduler, rsc);
893  }
894 
895  scheduler->resources = g_list_sort(scheduler->resources,
898  /* Ignore */
899 
902 
903  pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
904  pcmk__config_err("Either configure some or disable STONITH with the "
905  PCMK_OPT_STONITH_ENABLED " option");
906  pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
907  }
908 
909  return TRUE;
910 }
911 
921 void
922 pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler)
923 {
924  xmlNode *xml_obj = NULL;
925  int id = 0;
926 
927  for (xml_obj = pcmk__xe_first_child(xml_fencing_topology, PCMK_XE_FENCING_LEVEL, NULL, NULL);
928  xml_obj != NULL; xml_obj = pcmk__xe_next_same(xml_obj)) {
929 
930  crm_element_value_int(xml_obj, PCMK_XA_INDEX, &id);
931 
932  // Ensure an ID was given
933  if (pcmk__str_empty(pcmk__xe_id(xml_obj))) {
934  pcmk__config_warn("Ignoring registration for topology level without ID");
935  continue;
936  }
937 
938  // Ensure level ID is in allowed range
939  if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) {
940  pcmk__config_warn("Ignoring topology registration with invalid level %d",
941  id);
942  continue;
943  }
944 
945  }
946 }
947 
948 gboolean
950 {
951  xmlNode *xml_tag = NULL;
952 
953  scheduler->tags = pcmk__strkey_table(free, destroy_tag);
954 
955  for (xml_tag = pcmk__xe_first_child(xml_tags, NULL, NULL, NULL);
956  xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) {
957 
958  xmlNode *xml_obj_ref = NULL;
959  const char *tag_id = pcmk__xe_id(xml_tag);
960 
961  if (!pcmk__xe_is(xml_tag, PCMK_XE_TAG)) {
962  continue;
963  }
964 
965  if (tag_id == NULL) {
966  pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
967  (const char *) xml_tag->name);
968  continue;
969  }
970 
971  for (xml_obj_ref = pcmk__xe_first_child(xml_tag, NULL, NULL, NULL);
972  xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
973 
974  const char *obj_ref = pcmk__xe_id(xml_obj_ref);
975 
976  if (!pcmk__xe_is(xml_obj_ref, PCMK_XE_OBJ_REF)) {
977  continue;
978  }
979 
980  if (obj_ref == NULL) {
981  pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
982  xml_obj_ref->name, tag_id);
983  continue;
984  }
985 
986  if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) {
987  return FALSE;
988  }
989  }
990  }
991 
992  return TRUE;
993 }
994 
995 /* The ticket state section:
996  * "/cib/status/tickets/ticket_state" */
997 static gboolean
998 unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler)
999 {
1000  const char *ticket_id = NULL;
1001  const char *granted = NULL;
1002  const char *last_granted = NULL;
1003  const char *standby = NULL;
1004  xmlAttrPtr xIter = NULL;
1005 
1006  pcmk_ticket_t *ticket = NULL;
1007 
1008  ticket_id = pcmk__xe_id(xml_ticket);
1009  if (pcmk__str_empty(ticket_id)) {
1010  return FALSE;
1011  }
1012 
1013  crm_trace("Processing ticket state for %s", ticket_id);
1014 
1015  ticket = g_hash_table_lookup(scheduler->tickets, ticket_id);
1016  if (ticket == NULL) {
1017  ticket = ticket_new(ticket_id, scheduler);
1018  if (ticket == NULL) {
1019  return FALSE;
1020  }
1021  }
1022 
1023  for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1024  const char *prop_name = (const char *)xIter->name;
1025  const char *prop_value = pcmk__xml_attr_value(xIter);
1026 
1027  if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
1028  continue;
1029  }
1030  pcmk__insert_dup(ticket->state, prop_name, prop_value);
1031  }
1032 
1033  granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
1034  if (granted && crm_is_true(granted)) {
1035  ticket->granted = TRUE;
1036  crm_info("We have ticket '%s'", ticket->id);
1037  } else {
1038  ticket->granted = FALSE;
1039  crm_info("We do not have ticket '%s'", ticket->id);
1040  }
1041 
1042  last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
1043  if (last_granted) {
1044  long long last_granted_ll;
1045 
1046  pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1047  ticket->last_granted = (time_t) last_granted_ll;
1048  }
1049 
1050  standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
1051  if (standby && crm_is_true(standby)) {
1052  ticket->standby = TRUE;
1053  if (ticket->granted) {
1054  crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
1055  }
1056  } else {
1057  ticket->standby = FALSE;
1058  }
1059 
1060  crm_trace("Done with ticket state for %s", ticket_id);
1061 
1062  return TRUE;
1063 }
1064 
1065 static gboolean
1066 unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler)
1067 {
1068  xmlNode *xml_obj = NULL;
1069 
1070  for (xml_obj = pcmk__xe_first_child(xml_tickets, NULL, NULL, NULL);
1071  xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
1072 
1073  if (!pcmk__xe_is(xml_obj, PCMK__XE_TICKET_STATE)) {
1074  continue;
1075  }
1076  unpack_ticket_state(xml_obj, scheduler);
1077  }
1078 
1079  return TRUE;
1080 }
1081 
1082 static void
1083 unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
1085 {
1086  const char *discovery = NULL;
1087  const xmlNode *attrs = NULL;
1088  pcmk_resource_t *rsc = NULL;
1089 
1090  if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1091  return;
1092  }
1093 
1094  if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1095  return;
1096  }
1097  crm_trace("Processing Pacemaker Remote node %s",
1098  pcmk__node_name(this_node));
1099 
1101  &(this_node->details->remote_maintenance), 0);
1102 
1103  rsc = this_node->details->remote_rsc;
1104  if (this_node->details->remote_requires_reset == FALSE) {
1105  this_node->details->unclean = FALSE;
1106  this_node->details->unseen = FALSE;
1107  }
1109  NULL);
1110  add_node_attrs(attrs, this_node, TRUE, scheduler);
1111 
1112  if (pe__shutdown_requested(this_node)) {
1113  crm_info("%s is shutting down", pcmk__node_name(this_node));
1114  this_node->details->shutdown = TRUE;
1115  }
1116 
1117  if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL,
1119  crm_info("%s is in standby mode", pcmk__node_name(this_node));
1120  this_node->details->standby = TRUE;
1121  }
1122 
1125  || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) {
1126  crm_info("%s is in maintenance mode", pcmk__node_name(this_node));
1127  this_node->details->maintenance = TRUE;
1128  }
1129 
1130  discovery = pcmk__node_attr(this_node,
1132  NULL, pcmk__rsc_node_current);
1133  if ((discovery != NULL) && !crm_is_true(discovery)) {
1135  "Support for the "
1137  " node attribute is deprecated and will be removed"
1138  " (and behave as 'true') in a future release.");
1139 
1140  if (pcmk__is_remote_node(this_node)
1142  pcmk__config_warn("Ignoring "
1144  " attribute on Pacemaker Remote node %s"
1145  " because fencing is disabled",
1146  pcmk__node_name(this_node));
1147  } else {
1148  /* This is either a remote node with fencing enabled, or a guest
1149  * node. We don't care whether fencing is enabled when fencing guest
1150  * nodes, because they are "fenced" by recovering their containing
1151  * resource.
1152  */
1153  crm_info("%s has resource discovery disabled",
1154  pcmk__node_name(this_node));
1155  this_node->details->rsc_discovery_enabled = FALSE;
1156  }
1157  }
1158 }
1159 
1168 static void
1169 unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
1171 {
1172  const char *discovery = NULL;
1173  const xmlNode *attrs = pcmk__xe_first_child(state,
1175  NULL, NULL);
1176 
1177  add_node_attrs(attrs, node, TRUE, scheduler);
1178 
1181  crm_info("%s is in standby mode", pcmk__node_name(node));
1182  node->details->standby = TRUE;
1183  }
1184 
1187  crm_info("%s is in maintenance mode", pcmk__node_name(node));
1188  node->details->maintenance = TRUE;
1189  }
1190 
1191  discovery = pcmk__node_attr(node,
1193  NULL, pcmk__rsc_node_current);
1194  if ((discovery != NULL) && !crm_is_true(discovery)) {
1195  pcmk__config_warn("Ignoring "
1197  " attribute for %s because disabling resource"
1198  " discovery is not allowed for cluster nodes",
1199  pcmk__node_name(node));
1200  }
1201 }
1202 
1215 static void
1216 unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
1217 {
1218  const char *id = NULL;
1219  const char *uname = NULL;
1220  pcmk_node_t *this_node = NULL;
1221 
1222  id = crm_element_value(state, PCMK_XA_ID);
1223  if (id == NULL) {
1224  pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
1225  PCMK_XA_ID);
1226  crm_log_xml_info(state, "missing-id");
1227  return;
1228  }
1229 
1231  if (uname == NULL) {
1232  /* If a joining peer makes the cluster acquire the quorum from corosync
1233  * meanwhile it has not joined CPG membership of pacemaker-controld yet,
1234  * it's possible that the created PCMK__XE_NODE_STATE entry doesn't have
1235  * a PCMK_XA_UNAME yet. We should recognize the node as `pending` and
1236  * wait for it to join CPG.
1237  */
1238  crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
1239  "without " PCMK_XA_UNAME,
1240  id);
1241  }
1242 
1243  this_node = pe_find_node_any(scheduler->nodes, id, uname);
1244  if (this_node == NULL) {
1245  crm_notice("Ignoring recorded state for removed node with name %s and "
1246  PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id);
1247  return;
1248  }
1249 
1250  if (pcmk__is_pacemaker_remote_node(this_node)) {
1251  /* We can't determine the online status of Pacemaker Remote nodes until
1252  * after all resource history has been unpacked. In this first pass, we
1253  * do need to mark whether the node has been fenced, as this plays a
1254  * role during unpacking cluster node resource state.
1255  */
1257  &(this_node->details->remote_was_fenced), 0);
1258  return;
1259  }
1260 
1261  unpack_transient_attributes(state, this_node, scheduler);
1262 
1263  /* Provisionally mark this cluster node as clean. We have at least seen it
1264  * in the current cluster's lifetime.
1265  */
1266  this_node->details->unclean = FALSE;
1267  this_node->details->unseen = FALSE;
1268 
1269  crm_trace("Determining online status of cluster node %s (id %s)",
1270  pcmk__node_name(this_node), id);
1271  determine_online_status(state, this_node, scheduler);
1272 
1274  && this_node->details->online
1276  /* Everything else should flow from this automatically
1277  * (at least until the scheduler becomes able to migrate off
1278  * healthy resources)
1279  */
1280  pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1281  FALSE);
1282  }
1283 }
1284 
1302 static int
1303 unpack_node_history(const xmlNode *status, bool fence,
1305 {
1306  int rc = pcmk_rc_ok;
1307 
1308  // Loop through all PCMK__XE_NODE_STATE entries in CIB status
1309  for (const xmlNode *state = pcmk__xe_first_child(status,
1310  PCMK__XE_NODE_STATE, NULL,
1311  NULL);
1312  state != NULL; state = pcmk__xe_next_same(state)) {
1313 
1314  const char *id = pcmk__xe_id(state);
1315  const char *uname = crm_element_value(state, PCMK_XA_UNAME);
1316  pcmk_node_t *this_node = NULL;
1317 
1318  if ((id == NULL) || (uname == NULL)) {
1319  // Warning already logged in first pass through status section
1320  crm_trace("Not unpacking resource history from malformed "
1321  PCMK__XE_NODE_STATE " without id and/or uname");
1322  continue;
1323  }
1324 
1325  this_node = pe_find_node_any(scheduler->nodes, id, uname);
1326  if (this_node == NULL) {
1327  // Warning already logged in first pass through status section
1328  crm_trace("Not unpacking resource history for node %s because "
1329  "no longer in configuration", id);
1330  continue;
1331  }
1332 
1333  if (this_node->details->unpacked) {
1334  crm_trace("Not unpacking resource history for node %s because "
1335  "already unpacked", id);
1336  continue;
1337  }
1338 
1339  if (fence) {
1340  // We're processing all remaining nodes
1341 
1342  } else if (pcmk__is_guest_or_bundle_node(this_node)) {
1343  /* We can unpack a guest node's history only after we've unpacked
1344  * other resource history to the point that we know that the node's
1345  * connection and containing resource are both up.
1346  */
1347  pcmk_resource_t *rsc = this_node->details->remote_rsc;
1348 
1349  if ((rsc == NULL) || (rsc->role != pcmk_role_started)
1350  || (rsc->container->role != pcmk_role_started)) {
1351  crm_trace("Not unpacking resource history for guest node %s "
1352  "because container and connection are not known to "
1353  "be up", id);
1354  continue;
1355  }
1356 
1357  } else if (pcmk__is_remote_node(this_node)) {
1358  /* We can unpack a remote node's history only after we've unpacked
1359  * other resource history to the point that we know that the node's
1360  * connection is up, with the exception of when shutdown locks are
1361  * in use.
1362  */
1363  pcmk_resource_t *rsc = this_node->details->remote_rsc;
1364 
1365  if ((rsc == NULL)
1367  && (rsc->role != pcmk_role_started))) {
1368  crm_trace("Not unpacking resource history for remote node %s "
1369  "because connection is not known to be up", id);
1370  continue;
1371  }
1372 
1373  /* If fencing and shutdown locks are disabled and we're not processing
1374  * unseen nodes, then we don't want to unpack offline nodes until online
1375  * nodes have been unpacked. This allows us to number active clone
1376  * instances first.
1377  */
1378  } else if (!pcmk_any_flags_set(scheduler->flags,
1381  && !this_node->details->online) {
1382  crm_trace("Not unpacking resource history for offline "
1383  "cluster node %s", id);
1384  continue;
1385  }
1386 
1387  if (pcmk__is_pacemaker_remote_node(this_node)) {
1388  determine_remote_online_status(scheduler, this_node);
1389  unpack_handle_remote_attrs(this_node, state, scheduler);
1390  }
1391 
1392  crm_trace("Unpacking resource history for %snode %s",
1393  (fence? "unseen " : ""), id);
1394 
1395  this_node->details->unpacked = TRUE;
1396  unpack_node_lrm(this_node, state, scheduler);
1397 
1398  rc = EAGAIN; // Other node histories might depend on this one
1399  }
1400  return rc;
1401 }
1402 
1403 /* remove nodes that are down, stopping */
1404 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1405 /* anything else? */
1406 gboolean
1408 {
1409  xmlNode *state = NULL;
1410 
1411  crm_trace("Beginning unpack");
1412 
1413  if (scheduler->tickets == NULL) {
1415  }
1416 
1417  for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
1418  state = pcmk__xe_next(state)) {
1419 
1420  if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
1421  unpack_tickets_state((xmlNode *) state, scheduler);
1422 
1423  } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1424  unpack_node_state(state, scheduler);
1425  }
1426  }
1427 
1428  while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1429  crm_trace("Another pass through node resource histories is needed");
1430  }
1431 
1432  // Now catch any nodes we didn't see
1433  unpack_node_history(status,
1436  scheduler);
1437 
1438  /* Now that we know where resources are, we can schedule stops of containers
1439  * with failed bundle connections
1440  */
1441  if (scheduler->stop_needed != NULL) {
1442  for (GList *item = scheduler->stop_needed; item; item = item->next) {
1443  pcmk_resource_t *container = item->data;
1444  pcmk_node_t *node = pcmk__current_node(container);
1445 
1446  if (node) {
1447  stop_action(container, node, FALSE);
1448  }
1449  }
1450  g_list_free(scheduler->stop_needed);
1451  scheduler->stop_needed = NULL;
1452  }
1453 
1454  /* Now that we know status of all Pacemaker Remote connections and nodes,
1455  * we can stop connections for node shutdowns, and check the online status
1456  * of remote/guest nodes that didn't have any node history to unpack.
1457  */
1458  for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1459  pcmk_node_t *this_node = gIter->data;
1460 
1461  if (!pcmk__is_pacemaker_remote_node(this_node)) {
1462  continue;
1463  }
1464  if (this_node->details->shutdown
1465  && (this_node->details->remote_rsc != NULL)) {
1467  "remote shutdown");
1468  }
1469  if (!this_node->details->unpacked) {
1470  determine_remote_online_status(scheduler, this_node);
1471  }
1472  }
1473 
1474  return TRUE;
1475 }
1476 
1488 static long long
1489 unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
1490 {
1491  const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
1492  int member = 0;
1493 
1494  if (member_time == NULL) {
1495  return -1LL;
1496 
1497  } else if (crm_str_to_boolean(member_time, &member) == 1) {
1498  /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1499  * recorded as a boolean for a DC < 2.1.7, or the node is pending
1500  * shutdown and has left the CPG, in which case it was set to 1 to avoid
1501  * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT.
1502  *
1503  * We return the effective time for in_ccm=1 because what's important to
1504  * avoid fencing is that effective time minus this value is less than
1505  * the pending node timeout.
1506  */
1507  return member? (long long) get_effective_time(scheduler) : 0LL;
1508 
1509  } else {
1510  long long when_member = 0LL;
1511 
1512  if ((pcmk__scan_ll(member_time, &when_member,
1513  0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1514  crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
1515  " in " PCMK__XE_NODE_STATE " entry", member_time);
1516  return -1LL;
1517  }
1518  return when_member;
1519  }
1520 }
1521 
1531 static long long
1532 unpack_node_online(const xmlNode *node_state)
1533 {
1534  const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD);
1535 
1536  // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1537  if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
1539  return 0LL;
1540 
1541  } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1542  return 1LL;
1543 
1544  } else {
1545  long long when_online = 0LL;
1546 
1547  if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1548  || (when_online < 0)) {
1549  crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
1550  PCMK__XE_NODE_STATE " entry, assuming offline", peer_time);
1551  return 0LL;
1552  }
1553  return when_online;
1554  }
1555 }
1556 
1566 static bool
1567 unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
1568 {
1569  long long value = 0LL;
1570  int value_i = 0;
1571  const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
1572  NULL, pcmk__rsc_node_current);
1573 
1574  // Value may be boolean or an epoch time
1575  if (crm_str_to_boolean(value_s, &value_i) == 1) {
1576  return (value_i != 0);
1577  }
1578  if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) {
1579  return (value > 0);
1580  }
1581  crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1582  "node attribute for %s", value_s, pcmk__node_name(node));
1583  return false;
1584 }
1585 
1586 static gboolean
1587 determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
1588  const xmlNode *node_state,
1589  pcmk_node_t *this_node)
1590 {
1591  gboolean online = FALSE;
1592  const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1593  const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1594  long long when_member = unpack_node_member(node_state, scheduler);
1595  long long when_online = unpack_node_online(node_state);
1596 
1597  if (when_member <= 0) {
1598  crm_trace("Node %s is %sdown", pcmk__node_name(this_node),
1599  ((when_member < 0)? "presumed " : ""));
1600 
1601  } else if (when_online > 0) {
1602  if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1603  online = TRUE;
1604  } else {
1605  crm_debug("Node %s is not ready to run resources: %s",
1606  pcmk__node_name(this_node), join);
1607  }
1608 
1609  } else if (this_node->details->expected_up == FALSE) {
1610  crm_trace("Node %s controller is down: "
1611  "member@%lld online@%lld join=%s expected=%s",
1612  pcmk__node_name(this_node), when_member, when_online,
1613  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1614 
1615  } else {
1616  /* mark it unclean */
1617  pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1618  crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
1619  pcmk__node_name(this_node), when_member, when_online,
1620  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1621  }
1622  return online;
1623 }
1624 
1638 static inline bool
1639 pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1640  long long when_member, long long when_online)
1641 {
1642  if ((scheduler->node_pending_timeout > 0)
1643  && (when_member > 0) && (when_online <= 0)) {
1644  // There is a timeout on pending nodes, and node is pending
1645 
1646  time_t timeout = when_member + scheduler->node_pending_timeout;
1647 
1648  if (get_effective_time(node->details->data_set) >= timeout) {
1649  return true; // Node has timed out
1650  }
1651 
1652  // Node is pending, but still has time
1653  pe__update_recheck_time(timeout, scheduler, "pending node timeout");
1654  }
1655  return false;
1656 }
1657 
1658 static bool
1659 determine_online_status_fencing(pcmk_scheduler_t *scheduler,
1660  const xmlNode *node_state,
1661  pcmk_node_t *this_node)
1662 {
1663  bool termination_requested = unpack_node_terminate(this_node, node_state);
1664  const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1665  const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1666  long long when_member = unpack_node_member(node_state, scheduler);
1667  long long when_online = unpack_node_online(node_state);
1668 
1669 /*
1670  - PCMK__XA_JOIN ::= member|down|pending|banned
1671  - PCMK_XA_EXPECTED ::= member|down
1672 
1673  @COMPAT with entries recorded for DCs < 2.1.7
1674  - PCMK__XA_IN_CCM ::= true|false
1675  - PCMK_XA_CRMD ::= online|offline
1676 
1677  Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1678  - PCMK__XA_IN_CCM ::= <timestamp>|0
1679  Since when node has been a cluster member. A value 0 of means the node is not
1680  a cluster member.
1681 
1682  - PCMK_XA_CRMD ::= <timestamp>|0
1683  Since when peer has been online in CPG. A value 0 means the peer is offline
1684  in CPG.
1685 */
1686 
1687  crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1688  pcmk__node_name(this_node), when_member, when_online,
1689  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1690  (termination_requested? " (termination requested)" : ""));
1691 
1692  if (this_node->details->shutdown) {
1693  crm_debug("%s is shutting down", pcmk__node_name(this_node));
1694 
1695  /* Slightly different criteria since we can't shut down a dead peer */
1696  return (when_online > 0);
1697  }
1698 
1699  if (when_member < 0) {
1700  pe_fence_node(scheduler, this_node,
1701  "peer has not been seen by the cluster", FALSE);
1702  return false;
1703  }
1704 
1705  if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1706  pe_fence_node(scheduler, this_node,
1707  "peer failed Pacemaker membership criteria", FALSE);
1708 
1709  } else if (termination_requested) {
1710  if ((when_member <= 0) && (when_online <= 0)
1711  && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1712  crm_info("%s was fenced as requested", pcmk__node_name(this_node));
1713  return false;
1714  }
1715  pe_fence_node(scheduler, this_node, "fencing was requested", false);
1716 
1717  } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1719 
1720  if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1721  pe_fence_node(scheduler, this_node,
1722  "peer pending timed out on joining the process group",
1723  FALSE);
1724 
1725  } else if ((when_member > 0) || (when_online > 0)) {
1726  crm_info("- %s is not ready to run resources",
1727  pcmk__node_name(this_node));
1728  this_node->details->standby = TRUE;
1729  this_node->details->pending = TRUE;
1730 
1731  } else {
1732  crm_trace("%s is down or still coming up",
1733  pcmk__node_name(this_node));
1734  }
1735 
1736  } else if (when_member <= 0) {
1737  // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
1738  pe_fence_node(scheduler, this_node,
1739  "peer is no longer part of the cluster", TRUE);
1740 
1741  } else if (when_online <= 0) {
1742  pe_fence_node(scheduler, this_node,
1743  "peer process is no longer available", FALSE);
1744 
1745  /* Everything is running at this point, now check join state */
1746 
1747  } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1748  crm_info("%s is active", pcmk__node_name(this_node));
1749 
1750  } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
1751  CRMD_JOINSTATE_DOWN, NULL)) {
1752  crm_info("%s is not ready to run resources",
1753  pcmk__node_name(this_node));
1754  this_node->details->standby = TRUE;
1755  this_node->details->pending = TRUE;
1756 
1757  } else {
1758  pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1759  FALSE);
1760  }
1761 
1762  return (when_member > 0);
1763 }
1764 
1765 static void
1766 determine_remote_online_status(pcmk_scheduler_t *scheduler,
1767  pcmk_node_t *this_node)
1768 {
1769  pcmk_resource_t *rsc = this_node->details->remote_rsc;
1770  pcmk_resource_t *container = NULL;
1771  pcmk_node_t *host = NULL;
1772 
1773  /* If there is a node state entry for a (former) Pacemaker Remote node
1774  * but no resource creating that node, the node's connection resource will
1775  * be NULL. Consider it an offline remote node in that case.
1776  */
1777  if (rsc == NULL) {
1778  this_node->details->online = FALSE;
1779  goto remote_online_done;
1780  }
1781 
1782  container = rsc->container;
1783 
1784  if (container && pcmk__list_of_1(rsc->running_on)) {
1785  host = rsc->running_on->data;
1786  }
1787 
1788  /* If the resource is currently started, mark it online. */
1789  if (rsc->role == pcmk_role_started) {
1790  crm_trace("%s node %s presumed ONLINE because connection resource is started",
1791  (container? "Guest" : "Remote"), this_node->details->id);
1792  this_node->details->online = TRUE;
1793  }
1794 
1795  /* consider this node shutting down if transitioning start->stop */
1796  if ((rsc->role == pcmk_role_started)
1797  && (rsc->next_role == pcmk_role_stopped)) {
1798 
1799  crm_trace("%s node %s shutting down because connection resource is stopping",
1800  (container? "Guest" : "Remote"), this_node->details->id);
1801  this_node->details->shutdown = TRUE;
1802  }
1803 
1804  /* Now check all the failure conditions. */
1805  if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) {
1806  crm_trace("Guest node %s UNCLEAN because guest resource failed",
1807  this_node->details->id);
1808  this_node->details->online = FALSE;
1809  this_node->details->remote_requires_reset = TRUE;
1810 
1811  } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
1812  crm_trace("%s node %s OFFLINE because connection resource failed",
1813  (container? "Guest" : "Remote"), this_node->details->id);
1814  this_node->details->online = FALSE;
1815 
1816  } else if ((rsc->role == pcmk_role_stopped)
1817  || ((container != NULL)
1818  && (container->role == pcmk_role_stopped))) {
1819 
1820  crm_trace("%s node %s OFFLINE because its resource is stopped",
1821  (container? "Guest" : "Remote"), this_node->details->id);
1822  this_node->details->online = FALSE;
1823  this_node->details->remote_requires_reset = FALSE;
1824 
1825  } else if (host && (host->details->online == FALSE)
1826  && host->details->unclean) {
1827  crm_trace("Guest node %s UNCLEAN because host is unclean",
1828  this_node->details->id);
1829  this_node->details->online = FALSE;
1830  this_node->details->remote_requires_reset = TRUE;
1831  }
1832 
1833 remote_online_done:
1834  crm_trace("Remote node %s online=%s",
1835  this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1836 }
1837 
1838 static void
1839 determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
1841 {
1842  gboolean online = FALSE;
1843  const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1844 
1845  CRM_CHECK(this_node != NULL, return);
1846 
1847  this_node->details->shutdown = FALSE;
1848  this_node->details->expected_up = FALSE;
1849 
1850  if (pe__shutdown_requested(this_node)) {
1851  this_node->details->shutdown = TRUE;
1852 
1853  } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1854  this_node->details->expected_up = TRUE;
1855  }
1856 
1857  if (this_node->details->type == node_ping) {
1858  this_node->details->unclean = FALSE;
1859  online = FALSE; /* As far as resource management is concerned,
1860  * the node is safely offline.
1861  * Anyone caught abusing this logic will be shot
1862  */
1863 
1865  online = determine_online_status_no_fencing(scheduler, node_state,
1866  this_node);
1867 
1868  } else {
1869  online = determine_online_status_fencing(scheduler, node_state,
1870  this_node);
1871  }
1872 
1873  if (online) {
1874  this_node->details->online = TRUE;
1875 
1876  } else {
1877  /* remove node from contention */
1878  this_node->fixed = TRUE; // @COMPAT deprecated and unused
1879  this_node->weight = -PCMK_SCORE_INFINITY;
1880  }
1881 
1882  if (online && this_node->details->shutdown) {
1883  /* don't run resources here */
1884  this_node->fixed = TRUE; // @COMPAT deprecated and unused
1885  this_node->weight = -PCMK_SCORE_INFINITY;
1886  }
1887 
1888  if (this_node->details->type == node_ping) {
1889  crm_info("%s is not a Pacemaker node", pcmk__node_name(this_node));
1890 
1891  } else if (this_node->details->unclean) {
1892  pcmk__sched_warn("%s is unclean", pcmk__node_name(this_node));
1893 
1894  } else if (this_node->details->online) {
1895  crm_info("%s is %s", pcmk__node_name(this_node),
1896  this_node->details->shutdown ? "shutting down" :
1897  this_node->details->pending ? "pending" :
1898  this_node->details->standby ? "standby" :
1899  this_node->details->maintenance ? "maintenance" : "online");
1900 
1901  } else {
1902  crm_trace("%s is offline", pcmk__node_name(this_node));
1903  }
1904 }
1905 
1914 const char *
1915 pe_base_name_end(const char *id)
1916 {
1917  if (!pcmk__str_empty(id)) {
1918  const char *end = id + strlen(id) - 1;
1919 
1920  for (const char *s = end; s > id; --s) {
1921  switch (*s) {
1922  case '0':
1923  case '1':
1924  case '2':
1925  case '3':
1926  case '4':
1927  case '5':
1928  case '6':
1929  case '7':
1930  case '8':
1931  case '9':
1932  break;
1933  case ':':
1934  return (s == end)? s : (s - 1);
1935  default:
1936  return end;
1937  }
1938  }
1939  return end;
1940  }
1941  return NULL;
1942 }
1943 
1954 char *
1955 clone_strip(const char *last_rsc_id)
1956 {
1957  const char *end = pe_base_name_end(last_rsc_id);
1958  char *basename = NULL;
1959 
1960  CRM_ASSERT(end);
1961  basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1962  CRM_ASSERT(basename);
1963  return basename;
1964 }
1965 
1976 char *
1977 clone_zero(const char *last_rsc_id)
1978 {
1979  const char *end = pe_base_name_end(last_rsc_id);
1980  size_t base_name_len = end - last_rsc_id + 1;
1981  char *zero = NULL;
1982 
1983  CRM_ASSERT(end);
1984  zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
1985  memcpy(zero, last_rsc_id, base_name_len);
1986  zero[base_name_len] = ':';
1987  zero[base_name_len + 1] = '0';
1988  return zero;
1989 }
1990 
1991 static pcmk_resource_t *
1992 create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
1994 {
1995  pcmk_resource_t *rsc = NULL;
1996  xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
1997 
1998  pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
1999  crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
2000  crm_log_xml_debug(xml_rsc, "Orphan resource");
2001 
2002  if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
2003  return NULL;
2004  }
2005 
2006  if (xml_contains_remote_node(xml_rsc)) {
2007  pcmk_node_t *node;
2008 
2009  crm_debug("Detected orphaned remote node %s", rsc_id);
2010  node = pcmk_find_node(scheduler, rsc_id);
2011  if (node == NULL) {
2012  node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, NULL,
2013  scheduler);
2014  }
2015  link_rsc2remotenode(scheduler, rsc);
2016 
2017  if (node) {
2018  crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
2019  node->details->shutdown = TRUE;
2020  }
2021  }
2022 
2023  if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) {
2024  /* This orphaned rsc needs to be mapped to a container. */
2025  crm_trace("Detected orphaned container filler %s", rsc_id);
2027  }
2029  scheduler->resources = g_list_append(scheduler->resources, rsc);
2030  return rsc;
2031 }
2032 
2044 static pcmk_resource_t *
2045 create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
2046  const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2047 {
2049 
2050  // find_rsc() because we might be a cloned group
2051  pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL,
2053 
2054  pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s",
2055  top->id, parent->id, rsc_id, pcmk__node_name(node));
2056  return orphan;
2057 }
2058 
2074 static pcmk_resource_t *
2075 find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2076  pcmk_resource_t *parent, const char *rsc_id)
2077 {
2078  GList *rIter = NULL;
2079  pcmk_resource_t *rsc = NULL;
2080  pcmk_resource_t *inactive_instance = NULL;
2081  gboolean skip_inactive = FALSE;
2082 
2083  CRM_ASSERT(pcmk__is_anonymous_clone(parent));
2084 
2085  // Check for active (or partially active, for cloned groups) instance
2086  pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
2087  rsc_id, pcmk__node_name(node), parent->id);
2088  for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
2089  GList *locations = NULL;
2090  pcmk_resource_t *child = rIter->data;
2091 
2092  /* Check whether this instance is already known to be active or pending
2093  * anywhere, at this stage of unpacking. Because this function is called
2094  * for a resource before the resource's individual operation history
2095  * entries are unpacked, locations will generally not contain the
2096  * desired node.
2097  *
2098  * However, there are three exceptions:
2099  * (1) when child is a cloned group and we have already unpacked the
2100  * history of another member of the group on the same node;
2101  * (2) when we've already unpacked the history of another numbered
2102  * instance on the same node (which can happen if
2103  * PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
2104  * (3) when we re-run calculations on the same scheduler data as part of
2105  * a simulation.
2106  */
2107  child->fns->location(child, &locations, 2);
2108  if (locations) {
2109  /* We should never associate the same numbered anonymous clone
2110  * instance with multiple nodes, and clone instances can't migrate,
2111  * so there must be only one location, regardless of history.
2112  */
2113  CRM_LOG_ASSERT(locations->next == NULL);
2114 
2115  if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
2116  /* This child instance is active on the requested node, so check
2117  * for a corresponding configured resource. We use find_rsc()
2118  * instead of child because child may be a cloned group, and we
2119  * need the particular member corresponding to rsc_id.
2120  *
2121  * If the history entry is orphaned, rsc will be NULL.
2122  */
2123  rsc = parent->fns->find_rsc(child, rsc_id, NULL,
2125  if (rsc) {
2126  /* If there are multiple instance history entries for an
2127  * anonymous clone in a single node's history (which can
2128  * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
2129  * to false), we want to consider the instances beyond the
2130  * first as orphans, even if there are inactive instance
2131  * numbers available.
2132  */
2133  if (rsc->running_on) {
2134  crm_notice("Active (now-)anonymous clone %s has "
2135  "multiple (orphan) instance histories on %s",
2136  parent->id, pcmk__node_name(node));
2137  skip_inactive = TRUE;
2138  rsc = NULL;
2139  } else {
2140  pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
2141  }
2142  }
2143  }
2144  g_list_free(locations);
2145 
2146  } else {
2147  pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
2148  if (!skip_inactive && !inactive_instance
2149  && !pcmk_is_set(child->flags, pcmk_rsc_blocked)) {
2150  // Remember one inactive instance in case we don't find active
2151  inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
2153 
2154  /* ... but don't use it if it was already associated with a
2155  * pending action on another node
2156  */
2157  if ((inactive_instance != NULL) &&
2158  (inactive_instance->pending_node != NULL) &&
2159  !pcmk__same_node(inactive_instance->pending_node, node)) {
2160  inactive_instance = NULL;
2161  }
2162  }
2163  }
2164  }
2165 
2166  if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2167  pcmk__rsc_trace(parent, "Resource %s, empty slot",
2168  inactive_instance->id);
2169  rsc = inactive_instance;
2170  }
2171 
2172  /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
2173  * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
2174  * don't want to consume a valid instance number for unclean nodes. Such
2175  * instances may appear to be active according to the history, but should be
2176  * considered inactive, so we can start an instance elsewhere. Treat such
2177  * instances as orphans.
2178  *
2179  * An exception is instances running on guest nodes -- since guest node
2180  * "fencing" is actually just a resource stop, requires shouldn't apply.
2181  *
2182  * @TODO Ideally, we'd use an inactive instance number if it is not needed
2183  * for any clean instances. However, we don't know that at this point.
2184  */
2185  if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
2186  && (!node->details->online || node->details->unclean)
2187  && !pcmk__is_guest_or_bundle_node(node)
2189 
2190  rsc = NULL;
2191  }
2192 
2193  if (rsc == NULL) {
2194  rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
2195  pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id);
2196  }
2197  return rsc;
2198 }
2199 
2200 static pcmk_resource_t *
2201 unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2202  const char *rsc_id)
2203 {
2204  pcmk_resource_t *rsc = NULL;
2205  pcmk_resource_t *parent = NULL;
2206 
2207  crm_trace("looking for %s", rsc_id);
2208  rsc = pe_find_resource(scheduler->resources, rsc_id);
2209 
2210  if (rsc == NULL) {
2211  /* If we didn't find the resource by its name in the operation history,
2212  * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
2213  * we create a single :0 orphan to match against here.
2214  */
2215  char *clone0_id = clone_zero(rsc_id);
2217  clone0_id);
2218 
2219  if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) {
2220  rsc = clone0;
2221  parent = uber_parent(clone0);
2222  crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2223  } else {
2224  crm_trace("%s is not known as %s either (orphan)",
2225  rsc_id, clone0_id);
2226  }
2227  free(clone0_id);
2228 
2229  } else if (rsc->variant > pcmk_rsc_variant_primitive) {
2230  crm_trace("Resource history for %s is orphaned because it is no longer primitive",
2231  rsc_id);
2232  return NULL;
2233 
2234  } else {
2235  parent = uber_parent(rsc);
2236  }
2237 
2238  if (pcmk__is_anonymous_clone(parent)) {
2239 
2240  if (pcmk__is_bundled(parent)) {
2241  rsc = pe__find_bundle_replica(parent->parent, node);
2242  } else {
2243  char *base = clone_strip(rsc_id);
2244 
2245  rsc = find_anonymous_clone(scheduler, node, parent, base);
2246  free(base);
2247  CRM_ASSERT(rsc != NULL);
2248  }
2249  }
2250 
2251  if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
2252  && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_none)) {
2253 
2254  pcmk__str_update(&rsc->clone_name, rsc_id);
2255  pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2256  rsc_id, pcmk__node_name(node), rsc->id,
2257  pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : "");
2258  }
2259  return rsc;
2260 }
2261 
2262 static pcmk_resource_t *
2263 process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
2265 {
2266  pcmk_resource_t *rsc = NULL;
2267  const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2268 
2269  crm_debug("Detected orphan resource %s on %s",
2270  rsc_id, pcmk__node_name(node));
2271  rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2272  if (rsc == NULL) {
2273  return NULL;
2274  }
2275 
2278 
2279  } else {
2280  CRM_CHECK(rsc != NULL, return NULL);
2281  pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id);
2283  "__orphan_do_not_run__", scheduler);
2284  }
2285  return rsc;
2286 }
2287 
2288 static void
2289 process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
2290  enum action_fail_response on_fail)
2291 {
2292  pcmk_node_t *tmpnode = NULL;
2293  char *reason = NULL;
2294  enum action_fail_response save_on_fail = pcmk_on_fail_ignore;
2295 
2296  CRM_ASSERT(rsc);
2297  pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2298  rsc->id, pcmk_role_text(rsc->role), pcmk__node_name(node),
2299  pcmk_on_fail_text(on_fail));
2300 
2301  /* process current state */
2302  if (rsc->role != pcmk_role_unknown) {
2303  pcmk_resource_t *iter = rsc;
2304 
2305  while (iter) {
2306  if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
2307  pcmk_node_t *n = pe__copy_node(node);
2308 
2309  pcmk__rsc_trace(rsc, "%s%s%s known on %s",
2310  rsc->id,
2311  ((rsc->clone_name == NULL)? "" : " also known as "),
2312  ((rsc->clone_name == NULL)? "" : rsc->clone_name),
2313  pcmk__node_name(n));
2314  g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
2315  }
2316  if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) {
2317  break;
2318  }
2319  iter = iter->parent;
2320  }
2321  }
2322 
2323  /* If a managed resource is believed to be running, but node is down ... */
2324  if ((rsc->role > pcmk_role_stopped)
2325  && node->details->online == FALSE
2326  && node->details->maintenance == FALSE
2327  && pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2328 
2329  gboolean should_fence = FALSE;
2330 
2331  /* If this is a guest node, fence it (regardless of whether fencing is
2332  * enabled, because guest node fencing is done by recovery of the
2333  * container resource rather than by the fencer). Mark the resource
2334  * we're processing as failed. When the guest comes back up, its
2335  * operation history in the CIB will be cleared, freeing the affected
2336  * resource to run again once we are sure we know its state.
2337  */
2338  if (pcmk__is_guest_or_bundle_node(node)) {
2340  should_fence = TRUE;
2341 
2342  } else if (pcmk_is_set(rsc->cluster->flags,
2344  if (pcmk__is_remote_node(node)
2345  && (node->details->remote_rsc != NULL)
2346  && !pcmk_is_set(node->details->remote_rsc->flags,
2347  pcmk_rsc_failed)) {
2348 
2349  /* Setting unseen means that fencing of the remote node will
2350  * occur only if the connection resource is not going to start
2351  * somewhere. This allows connection resources on a failed
2352  * cluster node to move to another node without requiring the
2353  * remote nodes to be fenced as well.
2354  */
2355  node->details->unseen = TRUE;
2356  reason = crm_strdup_printf("%s is active there (fencing will be"
2357  " revoked if remote connection can "
2358  "be re-established elsewhere)",
2359  rsc->id);
2360  }
2361  should_fence = TRUE;
2362  }
2363 
2364  if (should_fence) {
2365  if (reason == NULL) {
2366  reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
2367  }
2368  pe_fence_node(rsc->cluster, node, reason, FALSE);
2369  }
2370  free(reason);
2371  }
2372 
2373  /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2374  save_on_fail = on_fail;
2375 
2376  if (node->details->unclean) {
2377  /* No extra processing needed
2378  * Also allows resources to be started again after a node is shot
2379  */
2380  on_fail = pcmk_on_fail_ignore;
2381  }
2382 
2383  switch (on_fail) {
2384  case pcmk_on_fail_ignore:
2385  /* nothing to do */
2386  break;
2387 
2388  case pcmk_on_fail_demote:
2390  demote_action(rsc, node, FALSE);
2391  break;
2392 
2394  /* treat it as if it is still running
2395  * but also mark the node as unclean
2396  */
2397  reason = crm_strdup_printf("%s failed there", rsc->id);
2398  pe_fence_node(rsc->cluster, node, reason, FALSE);
2399  free(reason);
2400  break;
2401 
2403  node->details->standby = TRUE;
2404  node->details->standby_onfail = TRUE;
2405  break;
2406 
2407  case pcmk_on_fail_block:
2408  /* is_managed == FALSE will prevent any
2409  * actions being sent for the resource
2410  */
2413  break;
2414 
2415  case pcmk_on_fail_ban:
2416  /* make sure it comes up somewhere else
2417  * or not at all
2418  */
2420  "__action_migration_auto__", rsc->cluster);
2421  break;
2422 
2423  case pcmk_on_fail_stop:
2426  break;
2427 
2428  case pcmk_on_fail_restart:
2429  if ((rsc->role != pcmk_role_stopped)
2430  && (rsc->role != pcmk_role_unknown)) {
2431  pcmk__set_rsc_flags(rsc,
2433  stop_action(rsc, node, FALSE);
2434  }
2435  break;
2436 
2439  if ((rsc->container != NULL) && pcmk__is_bundled(rsc)) {
2440  /* A bundle's remote connection can run on a different node than
2441  * the bundle's container. We don't necessarily know where the
2442  * container is running yet, so remember it and add a stop
2443  * action for it later.
2444  */
2445  rsc->cluster->stop_needed =
2446  g_list_prepend(rsc->cluster->stop_needed, rsc->container);
2447  } else if (rsc->container) {
2448  stop_action(rsc->container, node, FALSE);
2449  } else if ((rsc->role != pcmk_role_stopped)
2450  && (rsc->role != pcmk_role_unknown)) {
2451  stop_action(rsc, node, FALSE);
2452  }
2453  break;
2454 
2458  tmpnode = NULL;
2459  if (rsc->is_remote_node) {
2460  tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
2461  }
2462  if (pcmk__is_remote_node(tmpnode)
2463  && !(tmpnode->details->remote_was_fenced)) {
2464  /* The remote connection resource failed in a way that
2465  * should result in fencing the remote node.
2466  */
2467  pe_fence_node(rsc->cluster, tmpnode,
2468  "remote connection is unrecoverable", FALSE);
2469  }
2470  }
2471 
2472  /* require the stop action regardless if fencing is occurring or not. */
2473  if (rsc->role > pcmk_role_stopped) {
2474  stop_action(rsc, node, FALSE);
2475  }
2476 
2477  /* if reconnect delay is in use, prevent the connection from exiting the
2478  * "STOPPED" role until the failure is cleared by the delay timeout. */
2479  if (rsc->remote_reconnect_ms) {
2480  pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2481  }
2482  break;
2483  }
2484 
2485  /* ensure a remote-node connection failure forces an unclean remote-node
2486  * to be fenced. By setting unseen = FALSE, the remote-node failure will
2487  * result in a fencing operation regardless if we're going to attempt to
2488  * reconnect to the remote-node in this transition or not. */
2489  if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) {
2490  tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
2491  if (tmpnode && tmpnode->details->unclean) {
2492  tmpnode->details->unseen = FALSE;
2493  }
2494  }
2495 
2496  if ((rsc->role != pcmk_role_stopped)
2497  && (rsc->role != pcmk_role_unknown)) {
2498  if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
2499  if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2500  crm_notice("Removed resource %s is active on %s and will be "
2501  "stopped when possible",
2502  rsc->id, pcmk__node_name(node));
2503  } else {
2504  crm_notice("Removed resource %s must be stopped manually on %s "
2506  " is set to false", rsc->id, pcmk__node_name(node));
2507  }
2508  }
2509 
2510  native_add_running(rsc, node, rsc->cluster,
2511  (save_on_fail != pcmk_on_fail_ignore));
2512  switch (on_fail) {
2513  case pcmk_on_fail_ignore:
2514  break;
2515  case pcmk_on_fail_demote:
2516  case pcmk_on_fail_block:
2518  break;
2519  default:
2520  pcmk__set_rsc_flags(rsc,
2522  break;
2523  }
2524 
2525  } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2526  /* Only do this for older status sections that included instance numbers
2527  * Otherwise stopped instances will appear as orphans
2528  */
2529  pcmk__rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)",
2530  rsc->clone_name, rsc->id);
2531  free(rsc->clone_name);
2532  rsc->clone_name = NULL;
2533 
2534  } else {
2535  GList *possible_matches = pe__resource_actions(rsc, node,
2536  PCMK_ACTION_STOP, FALSE);
2537  GList *gIter = possible_matches;
2538 
2539  for (; gIter != NULL; gIter = gIter->next) {
2540  pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2541 
2543  }
2544 
2545  g_list_free(possible_matches);
2546  }
2547 
2548  /* A successful stop after migrate_to on the migration source doesn't make
2549  * the partially migrated resource stopped on the migration target.
2550  */
2551  if ((rsc->role == pcmk_role_stopped)
2552  && rsc->partial_migration_source
2553  && rsc->partial_migration_source->details == node->details
2554  && rsc->partial_migration_target
2555  && rsc->running_on) {
2556 
2557  rsc->role = pcmk_role_started;
2558  }
2559 }
2560 
2561 /* create active recurring operations as optional */
2562 static void
2563 process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
2564  int start_index, int stop_index,
2565  GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2566 {
2567  int counter = -1;
2568  const char *task = NULL;
2569  const char *status = NULL;
2570  GList *gIter = sorted_op_list;
2571 
2572  CRM_ASSERT(rsc);
2573  pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
2574  rsc->id, start_index, stop_index);
2575 
2576  for (; gIter != NULL; gIter = gIter->next) {
2577  xmlNode *rsc_op = (xmlNode *) gIter->data;
2578 
2579  guint interval_ms = 0;
2580  char *key = NULL;
2581  const char *id = pcmk__xe_id(rsc_op);
2582 
2583  counter++;
2584 
2585  if (node->details->online == FALSE) {
2586  pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
2587  rsc->id, pcmk__node_name(node));
2588  break;
2589 
2590  /* Need to check if there's a monitor for role="Stopped" */
2591  } else if (start_index < stop_index && counter <= stop_index) {
2592  pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2593  id, pcmk__node_name(node));
2594  continue;
2595 
2596  } else if (counter < start_index) {
2597  pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
2598  id, pcmk__node_name(node), counter);
2599  continue;
2600  }
2601 
2602  crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms);
2603  if (interval_ms == 0) {
2604  pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2605  id, pcmk__node_name(node));
2606  continue;
2607  }
2608 
2609  status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2610  if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2611  pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
2612  id, pcmk__node_name(node));
2613  continue;
2614  }
2615  task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2616  /* create the action */
2617  key = pcmk__op_key(rsc->id, task, interval_ms);
2618  pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
2619  custom_action(rsc, key, task, node, TRUE, scheduler);
2620  }
2621 }
2622 
2623 void
2624 calculate_active_ops(const GList *sorted_op_list, int *start_index,
2625  int *stop_index)
2626 {
2627  int counter = -1;
2628  int implied_monitor_start = -1;
2629  int implied_clone_start = -1;
2630  const char *task = NULL;
2631  const char *status = NULL;
2632 
2633  *stop_index = -1;
2634  *start_index = -1;
2635 
2636  for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2637  const xmlNode *rsc_op = (const xmlNode *) iter->data;
2638 
2639  counter++;
2640 
2641  task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2642  status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2643 
2644  if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2645  && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2646  *stop_index = counter;
2647 
2648  } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2649  PCMK_ACTION_MIGRATE_FROM, NULL)) {
2650  *start_index = counter;
2651 
2652  } else if ((implied_monitor_start <= *stop_index)
2653  && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2654  pcmk__str_casei)) {
2655  const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE);
2656 
2657  if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2658  implied_monitor_start = counter;
2659  }
2660  } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
2661  PCMK_ACTION_DEMOTE, NULL)) {
2662  implied_clone_start = counter;
2663  }
2664  }
2665 
2666  if (*start_index == -1) {
2667  if (implied_clone_start != -1) {
2668  *start_index = implied_clone_start;
2669  } else if (implied_monitor_start != -1) {
2670  *start_index = implied_monitor_start;
2671  }
2672  }
2673 }
2674 
2675 // If resource history entry has shutdown lock, remember lock node and time
2676 static void
2677 unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
2678  const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2679 {
2680  time_t lock_time = 0; // When lock started (i.e. node shutdown time)
2681 
2683  &lock_time) == pcmk_ok) && (lock_time != 0)) {
2684 
2685  if ((scheduler->shutdown_lock > 0)
2687  > (lock_time + scheduler->shutdown_lock))) {
2688  pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
2689  rsc->id, pcmk__node_name(node));
2690  pe__clear_resource_history(rsc, node);
2691  } else {
2692  /* @COMPAT I don't like breaking const signatures, but
2693  * rsc->lock_node should really be const -- we just can't change it
2694  * until the next API compatibility break.
2695  */
2696  rsc->lock_node = (pcmk_node_t *) node;
2697  rsc->lock_time = lock_time;
2698  }
2699  }
2700 }
2701 
2712 static pcmk_resource_t *
2713 unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
2715 {
2716  GList *gIter = NULL;
2717  int stop_index = -1;
2718  int start_index = -1;
2719  enum rsc_role_e req_role = pcmk_role_unknown;
2720 
2721  const char *rsc_id = pcmk__xe_id(lrm_resource);
2722 
2723  pcmk_resource_t *rsc = NULL;
2724  GList *op_list = NULL;
2725  GList *sorted_op_list = NULL;
2726 
2727  xmlNode *rsc_op = NULL;
2728  xmlNode *last_failure = NULL;
2729 
2731  enum rsc_role_e saved_role = pcmk_role_unknown;
2732 
2733  if (rsc_id == NULL) {
2734  pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
2735  " entry: No " PCMK_XA_ID);
2736  crm_log_xml_info(lrm_resource, "missing-id");
2737  return NULL;
2738  }
2739  crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s",
2740  rsc_id, pcmk__node_name(node));
2741 
2742  /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
2743  * them
2744  */
2745  for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
2746  NULL);
2747  rsc_op != NULL; rsc_op = pcmk__xe_next_same(rsc_op)) {
2748 
2749  op_list = g_list_prepend(op_list, rsc_op);
2750  }
2751 
2753  if (op_list == NULL) {
2754  // If there are no operations, there is nothing to do
2755  return NULL;
2756  }
2757  }
2758 
2759  /* find the resource */
2760  rsc = unpack_find_resource(scheduler, node, rsc_id);
2761  if (rsc == NULL) {
2762  if (op_list == NULL) {
2763  // If there are no operations, there is nothing to do
2764  return NULL;
2765  } else {
2766  rsc = process_orphan_resource(lrm_resource, node, scheduler);
2767  }
2768  }
2769  CRM_ASSERT(rsc != NULL);
2770 
2771  // Check whether the resource is "shutdown-locked" to this node
2773  unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2774  }
2775 
2776  /* process operations */
2777  saved_role = rsc->role;
2778  rsc->role = pcmk_role_unknown;
2779  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2780 
2781  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2782  xmlNode *rsc_op = (xmlNode *) gIter->data;
2783 
2784  unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2785  }
2786 
2787  /* create active recurring operations as optional */
2788  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2789  process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2790  scheduler);
2791 
2792  /* no need to free the contents */
2793  g_list_free(sorted_op_list);
2794 
2795  process_rsc_state(rsc, node, on_fail);
2796 
2797  if (get_target_role(rsc, &req_role)) {
2798  if ((rsc->next_role == pcmk_role_unknown)
2799  || (req_role < rsc->next_role)) {
2800 
2801  pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE);
2802 
2803  } else if (req_role > rsc->next_role) {
2804  pcmk__rsc_info(rsc,
2805  "%s: Not overwriting calculated next role %s"
2806  " with requested next role %s",
2807  rsc->id, pcmk_role_text(rsc->next_role),
2808  pcmk_role_text(req_role));
2809  }
2810  }
2811 
2812  if (saved_role > rsc->role) {
2813  rsc->role = saved_role;
2814  }
2815 
2816  return rsc;
2817 }
2818 
2819 static void
2820 handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list,
2822 {
2823  for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list, NULL,
2824  NULL, NULL);
2825  rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
2826 
2827  pcmk_resource_t *rsc;
2828  pcmk_resource_t *container;
2829  const char *rsc_id;
2830  const char *container_id;
2831 
2832  if (!pcmk__xe_is(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2833  continue;
2834  }
2835 
2836  container_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER);
2837  rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2838  if (container_id == NULL || rsc_id == NULL) {
2839  continue;
2840  }
2841 
2842  container = pe_find_resource(scheduler->resources, container_id);
2843  if (container == NULL) {
2844  continue;
2845  }
2846 
2847  rsc = pe_find_resource(scheduler->resources, rsc_id);
2848  if ((rsc == NULL) || (rsc->container != NULL)
2850  continue;
2851  }
2852 
2853  pcmk__rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
2854  rsc->id, container_id);
2855  rsc->container = container;
2856  container->fillers = g_list_append(container->fillers, rsc);
2857  }
2858 }
2859 
2868 static void
2869 unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
2871 {
2872  bool found_orphaned_container_filler = false;
2873 
2874  // Drill down to PCMK__XE_LRM_RESOURCES section
2875  xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
2876  if (xml == NULL) {
2877  return;
2878  }
2879  xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
2880  if (xml == NULL) {
2881  return;
2882  }
2883 
2884  // Unpack each PCMK__XE_LRM_RESOURCE entry
2885  for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
2887  NULL, NULL);
2888  rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) {
2889 
2890  pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2891 
2892  if ((rsc != NULL)
2894  found_orphaned_container_filler = true;
2895  }
2896  }
2897 
2898  /* Now that all resource state has been unpacked for this node, map any
2899  * orphaned container fillers to their container resource.
2900  */
2901  if (found_orphaned_container_filler) {
2902  handle_orphaned_container_fillers(xml, scheduler);
2903  }
2904 }
2905 
2906 static void
2907 set_active(pcmk_resource_t *rsc)
2908 {
2909  const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2910 
2911  if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) {
2912  rsc->role = pcmk_role_unpromoted;
2913  } else {
2914  rsc->role = pcmk_role_started;
2915  }
2916 }
2917 
2918 static void
2919 set_node_score(gpointer key, gpointer value, gpointer user_data)
2920 {
2921  pcmk_node_t *node = value;
2922  int *score = user_data;
2923 
2924  node->weight = *score;
2925 }
2926 
2927 #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
2928  "/" PCMK__XE_NODE_STATE
2929 #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \
2930  "/" PCMK__XE_LRM_RESOURCES \
2931  "/" PCMK__XE_LRM_RESOURCE
2932 #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
2933 
2934 static xmlNode *
2935 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2936  int target_rc, pcmk_scheduler_t *scheduler)
2937 {
2938  GString *xpath = NULL;
2939  xmlNode *xml = NULL;
2940 
2941  CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2942  return NULL);
2943 
2944  xpath = g_string_sized_new(256);
2945  pcmk__g_strcat(xpath,
2946  XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
2947  SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
2948  SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
2949  NULL);
2950 
2951  /* Need to check against transition_magic too? */
2952  if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
2953  pcmk__g_strcat(xpath,
2954  " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
2955  NULL);
2956 
2957  } else if ((source != NULL)
2958  && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
2959  pcmk__g_strcat(xpath,
2960  " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
2961  NULL);
2962  } else {
2963  g_string_append_c(xpath, ']');
2964  }
2965 
2966  xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2967  LOG_DEBUG);
2968  g_string_free(xpath, TRUE);
2969 
2970  if (xml && target_rc >= 0) {
2971  int rc = PCMK_OCF_UNKNOWN_ERROR;
2972  int status = PCMK_EXEC_ERROR;
2973 
2976  if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
2977  return NULL;
2978  }
2979  }
2980  return xml;
2981 }
2982 
2983 static xmlNode *
2984 find_lrm_resource(const char *rsc_id, const char *node_name,
2986 {
2987  GString *xpath = NULL;
2988  xmlNode *xml = NULL;
2989 
2990  CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
2991 
2992  xpath = g_string_sized_new(256);
2993  pcmk__g_strcat(xpath,
2994  XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
2995  SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
2996  NULL);
2997 
2998  xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2999  LOG_DEBUG);
3000 
3001  g_string_free(xpath, TRUE);
3002  return xml;
3003 }
3004 
3014 static bool
3015 unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
3016 {
3017  bool result = false;
3018  xmlXPathObjectPtr search;
3019  char *xpath = NULL;
3020 
3021  xpath = crm_strdup_printf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
3022  SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']"
3024  "[@" PCMK__XA_RC_CODE "!='%d']",
3025  node_name, rsc->id, PCMK_OCF_UNKNOWN);
3026 
3027  search = xpath_search(rsc->cluster->input, xpath);
3028  result = (numXpathResults(search) == 0);
3029  freeXpathObject(search);
3030  free(xpath);
3031  return result;
3032 }
3033 
3046 static bool
3047 monitor_not_running_after(const char *rsc_id, const char *node_name,
3048  const xmlNode *xml_op, bool same_node,
3050 {
3051  /* Any probe/monitor operation on the node indicating it was not running
3052  * there
3053  */
3054  xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
3056 
3057  return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0);
3058 }
3059 
3072 static bool
3073 non_monitor_after(const char *rsc_id, const char *node_name,
3074  const xmlNode *xml_op, bool same_node,
3076 {
3077  xmlNode *lrm_resource = NULL;
3078 
3079  lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
3080  if (lrm_resource == NULL) {
3081  return false;
3082  }
3083 
3084  for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
3085  NULL, NULL);
3086  op != NULL; op = pcmk__xe_next_same(op)) {
3087 
3088  const char * task = NULL;
3089 
3090  if (op == xml_op) {
3091  continue;
3092  }
3093 
3095 
3098  NULL)
3099  && pe__is_newer_op(op, xml_op, same_node) > 0) {
3100  return true;
3101  }
3102  }
3103 
3104  return false;
3105 }
3106 
3119 static bool
3120 newer_state_after_migrate(const char *rsc_id, const char *node_name,
3121  const xmlNode *migrate_to,
3122  const xmlNode *migrate_from,
3124 {
3125  const xmlNode *xml_op = migrate_to;
3126  const char *source = NULL;
3127  const char *target = NULL;
3128  bool same_node = false;
3129 
3130  if (migrate_from) {
3131  xml_op = migrate_from;
3132  }
3133 
3134  source = crm_element_value(xml_op, PCMK__META_MIGRATE_SOURCE);
3136 
3137  /* It's preferred to compare to the migrate event on the same node if
3138  * existing, since call ids are more reliable.
3139  */
3140  if (pcmk__str_eq(node_name, target, pcmk__str_casei)) {
3141  if (migrate_from) {
3142  xml_op = migrate_from;
3143  same_node = true;
3144 
3145  } else {
3146  xml_op = migrate_to;
3147  }
3148 
3149  } else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3150  if (migrate_to) {
3151  xml_op = migrate_to;
3152  same_node = true;
3153 
3154  } else {
3155  xml_op = migrate_from;
3156  }
3157  }
3158 
3159  /* If there's any newer non-monitor operation on the node, or any newer
3160  * probe/monitor operation on the node indicating it was not running there,
3161  * the migration events potentially no longer matter for the node.
3162  */
3163  return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler)
3164  || monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
3165  scheduler);
3166 }
3167 
3180 static int
3181 get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
3182  const pcmk_node_t *target_node,
3183  const char **source_name, const char **target_name)
3184 {
3185  *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE);
3186  *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET);
3187  if ((*source_name == NULL) || (*target_name == NULL)) {
3188  pcmk__config_err("Ignoring resource history entry %s without "
3190  PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
3191  return pcmk_rc_unpack_error;
3192  }
3193 
3194  if ((source_node != NULL)
3195  && !pcmk__str_eq(*source_name, source_node->details->uname,
3197  pcmk__config_err("Ignoring resource history entry %s because "
3198  PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
3199  pcmk__xe_id(entry), *source_name,
3200  pcmk__node_name(source_node));
3201  return pcmk_rc_unpack_error;
3202  }
3203 
3204  if ((target_node != NULL)
3205  && !pcmk__str_eq(*target_name, target_node->details->uname,
3207  pcmk__config_err("Ignoring resource history entry %s because "
3208  PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
3209  pcmk__xe_id(entry), *target_name,
3210  pcmk__node_name(target_node));
3211  return pcmk_rc_unpack_error;
3212  }
3213 
3214  return pcmk_rc_ok;
3215 }
3216 
3217 /*
3218  * \internal
3219  * \brief Add a migration source to a resource's list of dangling migrations
3220  *
3221  * If the migrate_to and migrate_from actions in a live migration both
3222  * succeeded, but there is no stop on the source, the migration is considered
3223  * "dangling." Add the source to the resource's dangling migration list, which
3224  * will be used to schedule a stop on the source without affecting the target.
3225  *
3226  * \param[in,out] rsc Resource involved in migration
3227  * \param[in] node Migration source
3228  */
3229 static void
3230 add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
3231 {
3232  pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3233  rsc->id, pcmk__node_name(node));
3234  rsc->role = pcmk_role_stopped;
3235  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations,
3236  (gpointer) node);
3237 }
3238 
3245 static void
3246 unpack_migrate_to_success(struct action_history *history)
3247 {
3248  /* A complete migration sequence is:
3249  * 1. migrate_to on source node (which succeeded if we get to this function)
3250  * 2. migrate_from on target node
3251  * 3. stop on source node
3252  *
3253  * If no migrate_from has happened, the migration is considered to be
3254  * "partial". If the migrate_from succeeded but no stop has happened, the
3255  * migration is considered to be "dangling".
3256  *
3257  * If a successful migrate_to and stop have happened on the source node, we
3258  * still need to check for a partial migration, due to scenarios (easier to
3259  * produce with batch-limit=1) like:
3260  *
3261  * - A resource is migrating from node1 to node2, and a migrate_to is
3262  * initiated for it on node1.
3263  *
3264  * - node2 goes into standby mode while the migrate_to is pending, which
3265  * aborts the transition.
3266  *
3267  * - Upon completion of the migrate_to, a new transition schedules a stop
3268  * on both nodes and a start on node1.
3269  *
3270  * - If the new transition is aborted for any reason while the resource is
3271  * stopping on node1, the transition after that stop completes will see
3272  * the migrate_to and stop on the source, but it's still a partial
3273  * migration, and the resource must be stopped on node2 because it is
3274  * potentially active there due to the migrate_to.
3275  *
3276  * We also need to take into account that either node's history may be
3277  * cleared at any point in the migration process.
3278  */
3279  int from_rc = PCMK_OCF_OK;
3280  int from_status = PCMK_EXEC_PENDING;
3281  pcmk_node_t *target_node = NULL;
3282  xmlNode *migrate_from = NULL;
3283  const char *source = NULL;
3284  const char *target = NULL;
3285  bool source_newer_op = false;
3286  bool target_newer_state = false;
3287  bool active_on_target = false;
3288 
3289  // Get source and target node names from XML
3290  if (get_migration_node_names(history->xml, history->node, NULL, &source,
3291  &target) != pcmk_rc_ok) {
3292  return;
3293  }
3294 
3295  // Check for newer state on the source
3296  source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3297  true, history->rsc->cluster);
3298 
3299  // Check for a migrate_from action from this source on the target
3300  migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3301  target, source, -1, history->rsc->cluster);
3302  if (migrate_from != NULL) {
3303  if (source_newer_op) {
3304  /* There's a newer non-monitor operation on the source and a
3305  * migrate_from on the target, so this migrate_to is irrelevant to
3306  * the resource's state.
3307  */
3308  return;
3309  }
3310  crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
3311  crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
3312  }
3313 
3314  /* If the resource has newer state on both the source and target after the
3315  * migration events, this migrate_to is irrelevant to the resource's state.
3316  */
3317  target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3318  history->xml, migrate_from,
3319  history->rsc->cluster);
3320  if (source_newer_op && target_newer_state) {
3321  return;
3322  }
3323 
3324  /* Check for dangling migration (migrate_from succeeded but stop not done).
3325  * We know there's no stop because we already returned if the target has a
3326  * migrate_from and the source has any newer non-monitor operation.
3327  */
3328  if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3329  add_dangling_migration(history->rsc, history->node);
3330  return;
3331  }
3332 
3333  /* Without newer state, this migrate_to implies the resource is active.
3334  * (Clones are not allowed to migrate, so role can't be promoted.)
3335  */
3336  history->rsc->role = pcmk_role_started;
3337 
3338  target_node = pcmk_find_node(history->rsc->cluster, target);
3339  active_on_target = !target_newer_state && (target_node != NULL)
3340  && target_node->details->online;
3341 
3342  if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3343  if (active_on_target) {
3344  native_add_running(history->rsc, target_node, history->rsc->cluster,
3345  TRUE);
3346  } else {
3347  // Mark resource as failed, require recovery, and prevent migration
3348  pcmk__set_rsc_flags(history->rsc,
3351  }
3352  return;
3353  }
3354 
3355  // The migrate_from is pending, complete but erased, or to be scheduled
3356 
3357  /* If there is no history at all for the resource on an online target, then
3358  * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3359  * have the probe result, it will be reflected in target_newer_state.
3360  */
3361  if ((target_node != NULL) && target_node->details->online
3362  && unknown_on_node(history->rsc, target)) {
3363  return;
3364  }
3365 
3366  if (active_on_target) {
3367  pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
3368  source);
3369 
3370  native_add_running(history->rsc, target_node, history->rsc->cluster,
3371  FALSE);
3372  if ((source_node != NULL) && source_node->details->online) {
3373  /* This is a partial migration: the migrate_to completed
3374  * successfully on the source, but the migrate_from has not
3375  * completed. Remember the source and target; if the newly
3376  * chosen target remains the same when we schedule actions
3377  * later, we may continue with the migration.
3378  */
3379  history->rsc->partial_migration_target = target_node;
3380  history->rsc->partial_migration_source = source_node;
3381  }
3382 
3383  } else if (!source_newer_op) {
3384  // Mark resource as failed, require recovery, and prevent migration
3385  pcmk__set_rsc_flags(history->rsc,
3388  }
3389 }
3390 
3397 static void
3398 unpack_migrate_to_failure(struct action_history *history)
3399 {
3400  xmlNode *target_migrate_from = NULL;
3401  const char *source = NULL;
3402  const char *target = NULL;
3403 
3404  // Get source and target node names from XML
3405  if (get_migration_node_names(history->xml, history->node, NULL, &source,
3406  &target) != pcmk_rc_ok) {
3407  return;
3408  }
3409 
3410  /* If a migration failed, we have to assume the resource is active. Clones
3411  * are not allowed to migrate, so role can't be promoted.
3412  */
3413  history->rsc->role = pcmk_role_started;
3414 
3415  // Check for migrate_from on the target
3416  target_migrate_from = find_lrm_op(history->rsc->id,
3418  PCMK_OCF_OK, history->rsc->cluster);
3419 
3420  if (/* If the resource state is unknown on the target, it will likely be
3421  * probed there.
3422  * Don't just consider it running there. We will get back here anyway in
3423  * case the probe detects it's running there.
3424  */
3425  !unknown_on_node(history->rsc, target)
3426  /* If the resource has newer state on the target after the migration
3427  * events, this migrate_to no longer matters for the target.
3428  */
3429  && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3430  target_migrate_from,
3431  history->rsc->cluster)) {
3432  /* The resource has no newer state on the target, so assume it's still
3433  * active there.
3434  * (if it is up).
3435  */
3436  pcmk_node_t *target_node = pcmk_find_node(history->rsc->cluster,
3437  target);
3438 
3439  if (target_node && target_node->details->online) {
3440  native_add_running(history->rsc, target_node, history->rsc->cluster,
3441  FALSE);
3442  }
3443 
3444  } else if (!non_monitor_after(history->rsc->id, source, history->xml, true,
3445  history->rsc->cluster)) {
3446  /* We know the resource has newer state on the target, but this
3447  * migrate_to still matters for the source as long as there's no newer
3448  * non-monitor operation there.
3449  */
3450 
3451  // Mark node as having dangling migration so we can force a stop later
3452  history->rsc->dangling_migrations =
3453  g_list_prepend(history->rsc->dangling_migrations,
3454  (gpointer) history->node);
3455  }
3456 }
3457 
3464 static void
3465 unpack_migrate_from_failure(struct action_history *history)
3466 {
3467  xmlNode *source_migrate_to = NULL;
3468  const char *source = NULL;
3469  const char *target = NULL;
3470 
3471  // Get source and target node names from XML
3472  if (get_migration_node_names(history->xml, NULL, history->node, &source,
3473  &target) != pcmk_rc_ok) {
3474  return;
3475  }
3476 
3477  /* If a migration failed, we have to assume the resource is active. Clones
3478  * are not allowed to migrate, so role can't be promoted.
3479  */
3480  history->rsc->role = pcmk_role_started;
3481 
3482  // Check for a migrate_to on the source
3483  source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3484  source, target, PCMK_OCF_OK,
3485  history->rsc->cluster);
3486 
3487  if (/* If the resource state is unknown on the source, it will likely be
3488  * probed there.
3489  * Don't just consider it running there. We will get back here anyway in
3490  * case the probe detects it's running there.
3491  */
3492  !unknown_on_node(history->rsc, source)
3493  /* If the resource has newer state on the source after the migration
3494  * events, this migrate_from no longer matters for the source.
3495  */
3496  && !newer_state_after_migrate(history->rsc->id, source,
3497  source_migrate_to, history->xml,
3498  history->rsc->cluster)) {
3499  /* The resource has no newer state on the source, so assume it's still
3500  * active there (if it is up).
3501  */
3502  pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
3503  source);
3504 
3505  if (source_node && source_node->details->online) {
3506  native_add_running(history->rsc, source_node, history->rsc->cluster,
3507  TRUE);
3508  }
3509  }
3510 }
3511 
3518 static void
3519 record_failed_op(struct action_history *history)
3520 {
3521  if (!(history->node->details->online)) {
3522  return;
3523  }
3524 
3525  for (const xmlNode *xIter = history->rsc->cluster->failed->children;
3526  xIter != NULL; xIter = xIter->next) {
3527 
3528  const char *key = pcmk__xe_history_key(xIter);
3529  const char *uname = crm_element_value(xIter, PCMK_XA_UNAME);
3530 
3531  if (pcmk__str_eq(history->key, key, pcmk__str_none)
3532  && pcmk__str_eq(uname, history->node->details->uname,
3533  pcmk__str_casei)) {
3534  crm_trace("Skipping duplicate entry %s on %s",
3535  history->key, pcmk__node_name(history->node));
3536  return;
3537  }
3538  }
3539 
3540  crm_trace("Adding entry for %s on %s to failed action list",
3541  history->key, pcmk__node_name(history->node));
3542  crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
3543  crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
3544  pcmk__xml_copy(history->rsc->cluster->failed, history->xml);
3545 }
3546 
3547 static char *
3548 last_change_str(const xmlNode *xml_op)
3549 {
3550  time_t when;
3551  char *result = NULL;
3552 
3554  &when) == pcmk_ok) {
3555  char *when_s = pcmk__epoch2str(&when, 0);
3556  const char *p = strchr(when_s, ' ');
3557 
3558  // Skip day of week to make message shorter
3559  if ((p != NULL) && (*(++p) != '\0')) {
3560  result = pcmk__str_copy(p);
3561  }
3562  free(when_s);
3563  }
3564 
3565  if (result == NULL) {
3566  result = pcmk__str_copy("unknown_time");
3567  }
3568 
3569  return result;
3570 }
3571 
3584 static int
3585 cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
3586 {
3587  switch (first) {
3588  case pcmk_on_fail_demote:
3589  switch (second) {
3590  case pcmk_on_fail_ignore:
3591  return 1;
3592  case pcmk_on_fail_demote:
3593  return 0;
3594  default:
3595  return -1;
3596  }
3597  break;
3598 
3600  switch (second) {
3601  case pcmk_on_fail_ignore:
3602  case pcmk_on_fail_demote:
3603  case pcmk_on_fail_restart:
3604  return 1;
3606  return 0;
3607  default:
3608  return -1;
3609  }
3610  break;
3611 
3613  switch (second) {
3614  case pcmk_on_fail_ignore:
3615  case pcmk_on_fail_demote:
3616  case pcmk_on_fail_restart:
3618  return 1;
3620  return 0;
3621  default:
3622  return -1;
3623  }
3624  break;
3625 
3626  default:
3627  break;
3628  }
3629  switch (second) {
3630  case pcmk_on_fail_demote:
3631  return (first == pcmk_on_fail_ignore)? -1 : 1;
3632 
3634  switch (first) {
3635  case pcmk_on_fail_ignore:
3636  case pcmk_on_fail_demote:
3637  case pcmk_on_fail_restart:
3638  return -1;
3639  default:
3640  return 1;
3641  }
3642  break;
3643 
3645  switch (first) {
3646  case pcmk_on_fail_ignore:
3647  case pcmk_on_fail_demote:
3648  case pcmk_on_fail_restart:
3650  return -1;
3651  default:
3652  return 1;
3653  }
3654  break;
3655 
3656  default:
3657  break;
3658  }
3659  return first - second;
3660 }
3661 
3668 static void
3669 ban_from_all_nodes(pcmk_resource_t *rsc)
3670 {
3671  int score = -PCMK_SCORE_INFINITY;
3672  pcmk_resource_t *fail_rsc = rsc;
3673 
3674  if (fail_rsc->parent != NULL) {
3675  pcmk_resource_t *parent = uber_parent(fail_rsc);
3676 
3677  if (pcmk__is_anonymous_clone(parent)) {
3678  /* For anonymous clones, if an operation with
3679  * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
3680  * entire clone must stop.
3681  */
3682  fail_rsc = parent;
3683  }
3684  }
3685 
3686  // Ban the resource from all nodes
3687  crm_notice("%s will not be started under current conditions", fail_rsc->id);
3688  if (fail_rsc->allowed_nodes != NULL) {
3689  g_hash_table_destroy(fail_rsc->allowed_nodes);
3690  }
3691  fail_rsc->allowed_nodes = pe__node_list2table(rsc->cluster->nodes);
3692  g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
3693 }
3694 
3703 static void
3704 unpack_failure_handling(struct action_history *history,
3705  enum action_fail_response *on_fail,
3706  enum rsc_role_e *fail_role)
3707 {
3708  xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3709  history->interval_ms, true);
3710 
3711  GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3712  history->task,
3713  history->interval_ms, config);
3714 
3715  const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
3716 
3717  *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3718  history->interval_ms, on_fail_str);
3719  *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3720  meta);
3721  g_hash_table_destroy(meta);
3722 }
3723 
3734 static void
3735 unpack_rsc_op_failure(struct action_history *history,
3736  enum action_fail_response config_on_fail,
3737  enum rsc_role_e fail_role, xmlNode **last_failure,
3738  enum action_fail_response *on_fail)
3739 {
3740  bool is_probe = false;
3741  char *last_change_s = NULL;
3742 
3743  *last_failure = history->xml;
3744 
3745  is_probe = pcmk_xe_is_probe(history->xml);
3746  last_change_s = last_change_str(history->xml);
3747 
3748  if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster)
3749  && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3750  crm_trace("Unexpected result (%s%s%s) was recorded for "
3751  "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3752  services_ocf_exitcode_str(history->exit_status),
3753  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3754  pcmk__s(history->exit_reason, ""),
3755  (is_probe? "probe" : history->task), history->rsc->id,
3756  pcmk__node_name(history->node), last_change_s,
3757  history->exit_status, history->id);
3758  } else {
3759  pcmk__sched_warn("Unexpected result (%s%s%s) was recorded for %s of "
3760  "%s on %s at %s " CRM_XS " exit-status=%d id=%s",
3761  services_ocf_exitcode_str(history->exit_status),
3762  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3763  pcmk__s(history->exit_reason, ""),
3764  (is_probe? "probe" : history->task), history->rsc->id,
3765  pcmk__node_name(history->node), last_change_s,
3766  history->exit_status, history->id);
3767 
3768  if (is_probe && (history->exit_status != PCMK_OCF_OK)
3769  && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3770  && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3771 
3772  /* A failed (not just unexpected) probe result could mean the user
3773  * didn't know resources will be probed even where they can't run.
3774  */
3775  crm_notice("If it is not possible for %s to run on %s, see "
3776  "the " PCMK_XA_RESOURCE_DISCOVERY " option for location "
3777  "constraints",
3778  history->rsc->id, pcmk__node_name(history->node));
3779  }
3780 
3781  record_failed_op(history);
3782  }
3783 
3784  free(last_change_s);
3785 
3786  if (cmp_on_fail(*on_fail, config_on_fail) < 0) {
3787  pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3788  pcmk_on_fail_text(*on_fail),
3789  pcmk_on_fail_text(config_on_fail), history->key);
3790  *on_fail = config_on_fail;
3791  }
3792 
3793  if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3794  resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
3795  "__stop_fail__", history->rsc->cluster);
3796 
3797  } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3798  unpack_migrate_to_failure(history);
3799 
3800  } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3801  unpack_migrate_from_failure(history);
3802 
3803  } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3804  history->rsc->role = pcmk_role_promoted;
3805 
3806  } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3807  if (config_on_fail == pcmk_on_fail_block) {
3808  history->rsc->role = pcmk_role_promoted;
3809  pe__set_next_role(history->rsc, pcmk_role_stopped,
3810  "demote with " PCMK_META_ON_FAIL "=block");
3811 
3812  } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3813  history->rsc->role = pcmk_role_stopped;
3814 
3815  } else {
3816  /* Staying in the promoted role would put the scheduler and
3817  * controller into a loop. Setting the role to unpromoted is not
3818  * dangerous because the resource will be stopped as part of
3819  * recovery, and any promotion will be ordered after that stop.
3820  */
3821  history->rsc->role = pcmk_role_unpromoted;
3822  }
3823  }
3824 
3825  if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3826  /* leave stopped */
3827  pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3828  history->rsc->role = pcmk_role_stopped;
3829 
3830  } else if (history->rsc->role < pcmk_role_started) {
3831  pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3832  set_active(history->rsc);
3833  }
3834 
3835  pcmk__rsc_trace(history->rsc,
3836  "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3837  history->rsc->id, pcmk_role_text(history->rsc->role),
3838  pcmk__btoa(history->node->details->unclean),
3839  pcmk_on_fail_text(config_on_fail),
3840  pcmk_role_text(fail_role));
3841 
3842  if ((fail_role != pcmk_role_started)
3843  && (history->rsc->next_role < fail_role)) {
3844  pe__set_next_role(history->rsc, fail_role, "failure");
3845  }
3846 
3847  if (fail_role == pcmk_role_stopped) {
3848  ban_from_all_nodes(history->rsc);
3849  }
3850 }
3851 
3861 static void
3862 block_if_unrecoverable(struct action_history *history)
3863 {
3864  char *last_change_s = NULL;
3865 
3866  if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3867  return; // All actions besides stop are always recoverable
3868  }
3869  if (pe_can_fence(history->node->details->data_set, history->node)) {
3870  return; // Failed stops are recoverable via fencing
3871  }
3872 
3873  last_change_s = last_change_str(history->xml);
3874  pcmk__sched_err("No further recovery can be attempted for %s "
3875  "because %s on %s failed (%s%s%s) at %s "
3876  CRM_XS " rc=%d id=%s",
3877  history->rsc->id, history->task,
3878  pcmk__node_name(history->node),
3879  services_ocf_exitcode_str(history->exit_status),
3880  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3881  pcmk__s(history->exit_reason, ""),
3882  last_change_s, history->exit_status, history->id);
3883 
3884  free(last_change_s);
3885 
3886  pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_managed);
3887  pcmk__set_rsc_flags(history->rsc, pcmk_rsc_blocked);
3888 }
3889 
3899 static inline void
3900 remap_because(struct action_history *history, const char **why, int value,
3901  const char *reason)
3902 {
3903  if (history->execution_status != value) {
3904  history->execution_status = value;
3905  *why = reason;
3906  }
3907 }
3908 
3931 static void
3932 remap_operation(struct action_history *history,
3933  enum action_fail_response *on_fail, bool expired)
3934 {
3935  bool is_probe = false;
3936  int orig_exit_status = history->exit_status;
3937  int orig_exec_status = history->execution_status;
3938  const char *why = NULL;
3939  const char *task = history->task;
3940 
3941  // Remap degraded results to their successful counterparts
3942  history->exit_status = pcmk__effective_rc(history->exit_status);
3943  if (history->exit_status != orig_exit_status) {
3944  why = "degraded result";
3945  if (!expired && (!history->node->details->shutdown
3946  || history->node->details->online)) {
3947  record_failed_op(history);
3948  }
3949  }
3950 
3951  if (!pcmk__is_bundled(history->rsc)
3952  && pcmk_xe_mask_probe_failure(history->xml)
3953  && ((history->execution_status != PCMK_EXEC_DONE)
3954  || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3955  history->execution_status = PCMK_EXEC_DONE;
3956  history->exit_status = PCMK_OCF_NOT_RUNNING;
3957  why = "equivalent probe result";
3958  }
3959 
3960  /* If the executor reported an execution status of anything but done or
3961  * error, consider that final. But for done or error, we know better whether
3962  * it should be treated as a failure or not, because we know the expected
3963  * result.
3964  */
3965  switch (history->execution_status) {
3966  case PCMK_EXEC_DONE:
3967  case PCMK_EXEC_ERROR:
3968  break;
3969 
3970  // These should be treated as node-fatal
3972  case PCMK_EXEC_NO_SECRETS:
3973  remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3974  "node-fatal error");
3975  goto remap_done;
3976 
3977  default:
3978  goto remap_done;
3979  }
3980 
3981  is_probe = pcmk_xe_is_probe(history->xml);
3982  if (is_probe) {
3983  task = "probe";
3984  }
3985 
3986  if (history->expected_exit_status < 0) {
3987  /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3988  * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3989  * expected exit status in the transition key, which (along with the
3990  * similar case of a corrupted transition key in the CIB) will be
3991  * reported to this function as -1. Pacemaker 2.0+ does not support
3992  * rolling upgrades from those versions or processing of saved CIB files
3993  * from those versions, so we do not need to care much about this case.
3994  */
3995  remap_because(history, &why, PCMK_EXEC_ERROR,
3996  "obsolete history format");
3997  pcmk__config_warn("Expected result not found for %s on %s "
3998  "(corrupt or obsolete CIB?)",
3999  history->key, pcmk__node_name(history->node));
4000 
4001  } else if (history->exit_status == history->expected_exit_status) {
4002  remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
4003 
4004  } else {
4005  remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
4006  pcmk__rsc_debug(history->rsc,
4007  "%s on %s: expected %d (%s), got %d (%s%s%s)",
4008  history->key, pcmk__node_name(history->node),
4009  history->expected_exit_status,
4010  services_ocf_exitcode_str(history->expected_exit_status),
4011  history->exit_status,
4012  services_ocf_exitcode_str(history->exit_status),
4013  (pcmk__str_empty(history->exit_reason)? "" : ": "),
4014  pcmk__s(history->exit_reason, ""));
4015  }
4016 
4017  switch (history->exit_status) {
4018  case PCMK_OCF_OK:
4019  if (is_probe
4020  && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
4021  char *last_change_s = last_change_str(history->xml);
4022 
4023  remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4024  pcmk__rsc_info(history->rsc,
4025  "Probe found %s active on %s at %s",
4026  history->rsc->id, pcmk__node_name(history->node),
4027  last_change_s);
4028  free(last_change_s);
4029  }
4030  break;
4031 
4032  case PCMK_OCF_NOT_RUNNING:
4033  if (is_probe
4034  || (history->expected_exit_status == history->exit_status)
4035  || !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) {
4036 
4037  /* For probes, recurring monitors for the Stopped role, and
4038  * unmanaged resources, "not running" is not considered a
4039  * failure.
4040  */
4041  remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
4042  history->rsc->role = pcmk_role_stopped;
4043  *on_fail = pcmk_on_fail_ignore;
4044  pe__set_next_role(history->rsc, pcmk_role_unknown,
4045  "not running");
4046  }
4047  break;
4048 
4050  if (is_probe
4051  && (history->exit_status != history->expected_exit_status)) {
4052  char *last_change_s = last_change_str(history->xml);
4053 
4054  remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4055  pcmk__rsc_info(history->rsc,
4056  "Probe found %s active and promoted on %s at %s",
4057  history->rsc->id,
4058  pcmk__node_name(history->node), last_change_s);
4059  free(last_change_s);
4060  }
4061  if (!expired
4062  || (history->exit_status == history->expected_exit_status)) {
4063  history->rsc->role = pcmk_role_promoted;
4064  }
4065  break;
4066 
4068  if (!expired) {
4069  history->rsc->role = pcmk_role_promoted;
4070  }
4071  remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
4072  break;
4073 
4075  remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
4076  break;
4077 
4079  {
4080  guint interval_ms = 0;
4082  &interval_ms);
4083 
4084  if (interval_ms == 0) {
4085  if (!expired) {
4086  block_if_unrecoverable(history);
4087  }
4088  remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
4089  "exit status");
4090  } else {
4091  remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
4092  "exit status");
4093  }
4094  }
4095  break;
4096 
4100  if (!expired) {
4101  block_if_unrecoverable(history);
4102  }
4103  remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
4104  break;
4105 
4106  default:
4107  if (history->execution_status == PCMK_EXEC_DONE) {
4108  char *last_change_s = last_change_str(history->xml);
4109 
4110  crm_info("Treating unknown exit status %d from %s of %s "
4111  "on %s at %s as failure",
4112  history->exit_status, task, history->rsc->id,
4113  pcmk__node_name(history->node), last_change_s);
4114  remap_because(history, &why, PCMK_EXEC_ERROR,
4115  "unknown exit status");
4116  free(last_change_s);
4117  }
4118  break;
4119  }
4120 
4121 remap_done:
4122  if (why != NULL) {
4123  pcmk__rsc_trace(history->rsc,
4124  "Remapped %s result from [%s: %s] to [%s: %s] "
4125  "because of %s",
4126  history->key, pcmk_exec_status_str(orig_exec_status),
4127  crm_exit_str(orig_exit_status),
4128  pcmk_exec_status_str(history->execution_status),
4129  crm_exit_str(history->exit_status), why);
4130  }
4131 }
4132 
4133 // return TRUE if start or monitor last failure but parameters changed
4134 static bool
4135 should_clear_for_param_change(const xmlNode *xml_op, const char *task,
4136  pcmk_resource_t *rsc, pcmk_node_t *node)
4137 {
4139  if (pe__bundle_needs_remote_name(rsc)) {
4140  /* We haven't allocated resources yet, so we can't reliably
4141  * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4142  * When that's needed, defer the check until later.
4143  */
4144  pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure,
4145  rsc->cluster);
4146 
4147  } else {
4148  pcmk__op_digest_t *digest_data = NULL;
4149 
4150  digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4151  rsc->cluster);
4152  switch (digest_data->rc) {
4153  case pcmk__digest_unknown:
4154  crm_trace("Resource %s history entry %s on %s"
4155  " has no digest to compare",
4156  rsc->id, pcmk__xe_history_key(xml_op),
4157  node->details->id);
4158  break;
4159  case pcmk__digest_match:
4160  break;
4161  default:
4162  return TRUE;
4163  }
4164  }
4165  }
4166  return FALSE;
4167 }
4168 
4169 // Order action after fencing of remote node, given connection rsc
4170 static void
4171 order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
4173 {
4174  pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
4175 
4176  if (remote_node) {
4177  pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4178  FALSE, scheduler);
4179 
4181  }
4182 }
4183 
4184 static bool
4185 should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
4186  guint interval_ms, bool is_last_failure)
4187 {
4188  /* Clearing failures of recurring monitors has special concerns. The
4189  * executor reports only changes in the monitor result, so if the
4190  * monitor is still active and still getting the same failure result,
4191  * that will go undetected after the failure is cleared.
4192  *
4193  * Also, the operation history will have the time when the recurring
4194  * monitor result changed to the given code, not the time when the
4195  * result last happened.
4196  *
4197  * @TODO We probably should clear such failures only when the failure
4198  * timeout has passed since the last occurrence of the failed result.
4199  * However we don't record that information. We could maybe approximate
4200  * that by clearing only if there is a more recent successful monitor or
4201  * stop result, but we don't even have that information at this point
4202  * since we are still unpacking the resource's operation history.
4203  *
4204  * This is especially important for remote connection resources with a
4205  * reconnect interval, so in that case, we skip clearing failures
4206  * if the remote node hasn't been fenced.
4207  */
4208  if (rsc->remote_reconnect_ms
4210  && (interval_ms != 0)
4211  && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4212 
4213  pcmk_node_t *remote_node = pcmk_find_node(rsc->cluster, rsc->id);
4214 
4215  if (remote_node && !remote_node->details->remote_was_fenced) {
4216  if (is_last_failure) {
4217  crm_info("Waiting to clear monitor failure for remote node %s"
4218  " until fencing has occurred", rsc->id);
4219  }
4220  return TRUE;
4221  }
4222  }
4223  return FALSE;
4224 }
4225 
4244 static bool
4245 check_operation_expiry(struct action_history *history)
4246 {
4247  bool expired = false;
4248  bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
4249  time_t last_run = 0;
4250  int unexpired_fail_count = 0;
4251  const char *clear_reason = NULL;
4252 
4253  if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4254  pcmk__rsc_trace(history->rsc,
4255  "Resource history entry %s on %s is not expired: "
4256  "Not Installed does not expire",
4257  history->id, pcmk__node_name(history->node));
4258  return false; // "Not installed" must always be cleared manually
4259  }
4260 
4261  if ((history->rsc->failure_timeout > 0)
4263  &last_run) == 0)) {
4264 
4265  /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
4266  * timestamp
4267  */
4268 
4269  time_t now = get_effective_time(history->rsc->cluster);
4270  time_t last_failure = 0;
4271 
4272  // Is this particular operation history older than the failure timeout?
4273  if ((now >= (last_run + history->rsc->failure_timeout))
4274  && !should_ignore_failure_timeout(history->rsc, history->task,
4275  history->interval_ms,
4276  is_last_failure)) {
4277  expired = true;
4278  }
4279 
4280  // Does the resource as a whole have an unexpired fail count?
4281  unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4282  &last_failure,
4284  history->xml);
4285 
4286  // Update scheduler recheck time according to *last* failure
4287  crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
4288  " last-failure@%lld",
4289  history->id, (long long) last_run, (expired? "" : "not "),
4290  (long long) now, unexpired_fail_count,
4291  history->rsc->failure_timeout, (long long) last_failure);
4292  last_failure += history->rsc->failure_timeout + 1;
4293  if (unexpired_fail_count && (now < last_failure)) {
4294  pe__update_recheck_time(last_failure, history->rsc->cluster,
4295  "fail count expiration");
4296  }
4297  }
4298 
4299  if (expired) {
4300  if (pe_get_failcount(history->node, history->rsc, NULL,
4301  pcmk__fc_default, history->xml)) {
4302  // There is a fail count ignoring timeout
4303 
4304  if (unexpired_fail_count == 0) {
4305  // There is no fail count considering timeout
4306  clear_reason = "it expired";
4307 
4308  } else {
4309  /* This operation is old, but there is an unexpired fail count.
4310  * In a properly functioning cluster, this should only be
4311  * possible if this operation is not a failure (otherwise the
4312  * fail count should be expired too), so this is really just a
4313  * failsafe.
4314  */
4315  pcmk__rsc_trace(history->rsc,
4316  "Resource history entry %s on %s is not "
4317  "expired: Unexpired fail count",
4318  history->id, pcmk__node_name(history->node));
4319  expired = false;
4320  }
4321 
4322  } else if (is_last_failure
4323  && (history->rsc->remote_reconnect_ms != 0)) {
4324  /* Clear any expired last failure when reconnect interval is set,
4325  * even if there is no fail count.
4326  */
4327  clear_reason = "reconnect interval is set";
4328  }
4329  }
4330 
4331  if (!expired && is_last_failure
4332  && should_clear_for_param_change(history->xml, history->task,
4333  history->rsc, history->node)) {
4334  clear_reason = "resource parameters have changed";
4335  }
4336 
4337  if (clear_reason != NULL) {
4338  pcmk_action_t *clear_op = NULL;
4339 
4340  // Schedule clearing of the fail count
4341  clear_op = pe__clear_failcount(history->rsc, history->node,
4342  clear_reason, history->rsc->cluster);
4343 
4344  if (pcmk_is_set(history->rsc->cluster->flags,
4346  && (history->rsc->remote_reconnect_ms != 0)) {
4347  /* If we're clearing a remote connection due to a reconnect
4348  * interval, we want to wait until any scheduled fencing
4349  * completes.
4350  *
4351  * We could limit this to remote_node->details->unclean, but at
4352  * this point, that's always true (it won't be reliable until
4353  * after unpack_node_history() is done).
4354  */
4355  crm_info("Clearing %s failure will wait until any scheduled "
4356  "fencing of %s completes",
4357  history->task, history->rsc->id);
4358  order_after_remote_fencing(clear_op, history->rsc,
4359  history->rsc->cluster);
4360  }
4361  }
4362 
4363  if (expired && (history->interval_ms == 0)
4364  && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4365  switch (history->exit_status) {
4366  case PCMK_OCF_OK:
4367  case PCMK_OCF_NOT_RUNNING:
4369  case PCMK_OCF_DEGRADED:
4371  // Don't expire probes that return these values
4372  pcmk__rsc_trace(history->rsc,
4373  "Resource history entry %s on %s is not "
4374  "expired: Probe result",
4375  history->id, pcmk__node_name(history->node));
4376  expired = false;
4377  break;
4378  }
4379  }
4380 
4381  return expired;
4382 }
4383 
4384 int
4385 pe__target_rc_from_xml(const xmlNode *xml_op)
4386 {
4387  int target_rc = 0;
4388  const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY);
4389 
4390  if (key == NULL) {
4391  return -1;
4392  }
4393  decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4394  return target_rc;
4395 }
4396 
4406 static void
4407 update_resource_state(struct action_history *history, int exit_status,
4408  const xmlNode *last_failure,
4409  enum action_fail_response *on_fail)
4410 {
4411  bool clear_past_failure = false;
4412 
4413  if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4414  || (!pcmk__is_bundled(history->rsc)
4415  && pcmk_xe_mask_probe_failure(history->xml))) {
4416  history->rsc->role = pcmk_role_stopped;
4417 
4418  } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4419  clear_past_failure = true;
4420 
4421  } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4422  pcmk__str_none)) {
4423  if ((last_failure != NULL)
4424  && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4425  pcmk__str_none)) {
4426  clear_past_failure = true;
4427  }
4428  if (history->rsc->role < pcmk_role_started) {
4429  set_active(history->rsc);
4430  }
4431 
4432  } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4433  history->rsc->role = pcmk_role_started;
4434  clear_past_failure = true;
4435 
4436  } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4437  history->rsc->role = pcmk_role_stopped;
4438  clear_past_failure = true;
4439 
4440  } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4441  pcmk__str_none)) {
4442  history->rsc->role = pcmk_role_promoted;
4443  clear_past_failure = true;
4444 
4445  } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4446  pcmk__str_none)) {
4447  if (*on_fail == pcmk_on_fail_demote) {
4448  /* Demote clears an error only if
4449  * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
4450  */
4451  clear_past_failure = true;
4452  }
4453  history->rsc->role = pcmk_role_unpromoted;
4454 
4455  } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4456  pcmk__str_none)) {
4457  history->rsc->role = pcmk_role_started;
4458  clear_past_failure = true;
4459 
4460  } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4461  pcmk__str_none)) {
4462  unpack_migrate_to_success(history);
4463 
4464  } else if (history->rsc->role < pcmk_role_started) {
4465  pcmk__rsc_trace(history->rsc, "%s active on %s",
4466  history->rsc->id, pcmk__node_name(history->node));
4467  set_active(history->rsc);
4468  }
4469 
4470  if (!clear_past_failure) {
4471  return;
4472  }
4473 
4474  switch (*on_fail) {
4475  case pcmk_on_fail_stop:
4476  case pcmk_on_fail_ban:
4479  pcmk__rsc_trace(history->rsc,
4480  "%s (%s) is not cleared by a completed %s",
4481  history->rsc->id, pcmk_on_fail_text(*on_fail),
4482  history->task);
4483  break;
4484 
4485  case pcmk_on_fail_block:
4486  case pcmk_on_fail_ignore:
4487  case pcmk_on_fail_demote:
4488  case pcmk_on_fail_restart:
4490  *on_fail = pcmk_on_fail_ignore;
4491  pe__set_next_role(history->rsc, pcmk_role_unknown,
4492  "clear past failures");
4493  break;
4494 
4496  if (history->rsc->remote_reconnect_ms == 0) {
4497  /* With no reconnect interval, the connection is allowed to
4498  * start again after the remote node is fenced and
4499  * completely stopped. (With a reconnect interval, we wait
4500  * for the failure to be cleared entirely before attempting
4501  * to reconnect.)
4502  */
4503  *on_fail = pcmk_on_fail_ignore;
4504  pe__set_next_role(history->rsc, pcmk_role_unknown,
4505  "clear past failures and reset remote");
4506  }
4507  break;
4508  }
4509 }
4510 
4519 static inline bool
4520 can_affect_state(struct action_history *history)
4521 {
4522 #if 0
4523  /* @COMPAT It might be better to parse only actions we know we're interested
4524  * in, rather than exclude a couple we don't. However that would be a
4525  * behavioral change that should be done at a major or minor series release.
4526  * Currently, unknown operations can affect whether a resource is considered
4527  * active and/or failed.
4528  */
4529  return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4533  "asyncmon", NULL);
4534 #else
4535  return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY,
4536  PCMK_ACTION_META_DATA, NULL);
4537 #endif
4538 }
4539 
4548 static int
4549 unpack_action_result(struct action_history *history)
4550 {
4551  if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS,
4552  &(history->execution_status)) < 0)
4553  || (history->execution_status < PCMK_EXEC_PENDING)
4554  || (history->execution_status > PCMK_EXEC_MAX)
4555  || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4556  pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4557  "with invalid " PCMK__XA_OP_STATUS " '%s'",
4558  history->id, history->rsc->id,
4559  pcmk__node_name(history->node),
4560  pcmk__s(crm_element_value(history->xml,
4562  ""));
4563  return pcmk_rc_unpack_error;
4564  }
4565  if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE,
4566  &(history->exit_status)) < 0)
4567  || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4568 #if 0
4569  /* @COMPAT We should ignore malformed entries, but since that would
4570  * change behavior, it should be done at a major or minor series
4571  * release.
4572  */
4573  pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4574  "with invalid " PCMK__XA_RC_CODE " '%s'",
4575  history->id, history->rsc->id,
4576  pcmk__node_name(history->node),
4577  pcmk__s(crm_element_value(history->xml,
4579  ""));
4580  return pcmk_rc_unpack_error;
4581 #else
4582  history->exit_status = CRM_EX_ERROR;
4583 #endif
4584  }
4585  history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON);
4586  return pcmk_rc_ok;
4587 }
4588 
4599 static int
4600 process_expired_result(struct action_history *history, int orig_exit_status)
4601 {
4602  if (!pcmk__is_bundled(history->rsc)
4603  && pcmk_xe_mask_probe_failure(history->xml)
4604  && (orig_exit_status != history->expected_exit_status)) {
4605 
4606  if (history->rsc->role <= pcmk_role_stopped) {
4607  history->rsc->role = pcmk_role_unknown;
4608  }
4609  crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
4610  "Masked failure expired",
4611  history->id, history->rsc->id,
4612  pcmk__node_name(history->node));
4613  return pcmk_rc_ok;
4614  }
4615 
4616  if (history->exit_status == history->expected_exit_status) {
4617  return pcmk_rc_undetermined; // Only failures expire
4618  }
4619 
4620  if (history->interval_ms == 0) {
4621  crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
4622  "Expired failure",
4623  history->id, history->task, history->rsc->id,
4624  pcmk__node_name(history->node));
4625  return pcmk_rc_ok;
4626  }
4627 
4628  if (history->node->details->online && !history->node->details->unclean) {
4629  /* Reschedule the recurring action. schedule_cancel() won't work at
4630  * this stage, so as a hacky workaround, forcibly change the restart
4631  * digest so pcmk__check_action_config() does what we want later.
4632  *
4633  * @TODO We should skip this if there is a newer successful monitor.
4634  * Also, this causes rescheduling only if the history entry
4635  * has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
4636  * scheduler regression test doesn't, but that may not be a
4637  * realistic scenario in production).
4638  */
4639  crm_notice("Rescheduling %s-interval %s of %s on %s "
4640  "after failure expired",
4641  pcmk__readable_interval(history->interval_ms), history->task,
4642  history->rsc->id, pcmk__node_name(history->node));
4644  "calculated-failure-timeout");
4645  return pcmk_rc_ok;
4646  }
4647 
4648  return pcmk_rc_undetermined;
4649 }
4650 
4660 static void
4661 mask_probe_failure(struct action_history *history, int orig_exit_status,
4662  const xmlNode *last_failure,
4663  enum action_fail_response *on_fail)
4664 {
4665  pcmk_resource_t *ban_rsc = history->rsc;
4666 
4667  if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) {
4668  ban_rsc = uber_parent(history->rsc);
4669  }
4670 
4671  crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
4672  services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
4673  pcmk__node_name(history->node));
4674  update_resource_state(history, history->expected_exit_status, last_failure,
4675  on_fail);
4676  crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
4677 
4678  record_failed_op(history);
4679  resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
4680  "masked-probe-failure", history->rsc->cluster);
4681 }
4682 
4695 static bool
4696 failure_is_newer(const struct action_history *history,
4697  const xmlNode *last_failure)
4698 {
4699  guint failure_interval_ms = 0U;
4700  long long failure_change = 0LL;
4701  long long this_change = 0LL;
4702 
4703  if (last_failure == NULL) {
4704  return false; // Resource has no last_failure entry
4705  }
4706 
4707  if (!pcmk__str_eq(history->task,
4708  crm_element_value(last_failure, PCMK_XA_OPERATION),
4709  pcmk__str_none)) {
4710  return false; // last_failure is for different action
4711  }
4712 
4713  if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL,
4714  &failure_interval_ms) != pcmk_ok)
4715  || (history->interval_ms != failure_interval_ms)) {
4716  return false; // last_failure is for action with different interval
4717  }
4718 
4720  &this_change, 0LL) != pcmk_rc_ok)
4721  || (pcmk__scan_ll(crm_element_value(last_failure,
4723  &failure_change, 0LL) != pcmk_rc_ok)
4724  || (failure_change < this_change)) {
4725  return false; // Failure is not known to be newer
4726  }
4727 
4728  return true;
4729 }
4730 
4738 static void
4739 process_pending_action(struct action_history *history,
4740  const xmlNode *last_failure)
4741 {
4742  /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4743  * and there might be a RSC_monitor_INTERVAL entry with the last successful
4744  * or pending result.
4745  *
4746  * If last_failure contains the failure of the pending recurring monitor
4747  * we're processing here, and is newer, the action is no longer pending.
4748  * (Pending results have call ID -1, which sorts last, so the last failure
4749  * if any should be known.)
4750  */
4751  if (failure_is_newer(history, last_failure)) {
4752  return;
4753  }
4754 
4755  if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4757  set_active(history->rsc);
4758 
4759  } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4760  history->rsc->role = pcmk_role_promoted;
4761 
4762  } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4763  && history->node->details->unclean) {
4764  /* A migrate_to action is pending on a unclean source, so force a stop
4765  * on the target.
4766  */
4767  const char *migrate_target = NULL;
4768  pcmk_node_t *target = NULL;
4769 
4770  migrate_target = crm_element_value(history->xml,
4772  target = pcmk_find_node(history->rsc->cluster, migrate_target);
4773  if (target != NULL) {
4774  stop_action(history->rsc, target, FALSE);
4775  }
4776  }
4777 
4778  if (history->rsc->pending_task != NULL) {
4779  /* There should never be multiple pending actions, but as a failsafe,
4780  * just remember the first one processed for display purposes.
4781  */
4782  return;
4783  }
4784 
4785  if (pcmk_is_probe(history->task, history->interval_ms)) {
4786  /* Pending probes are currently never displayed, even if pending
4787  * operations are requested. If we ever want to change that,
4788  * enable the below and the corresponding part of
4789  * native.c:native_pending_task().
4790  */
4791 #if 0
4792  history->rsc->pending_task = strdup("probe");
4793  history->rsc->pending_node = history->node;
4794 #endif
4795  } else {
4796  history->rsc->pending_task = strdup(history->task);
4797  history->rsc->pending_node = history->node;
4798  }
4799 }
4800 
4801 static void
4802 unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
4803  xmlNode **last_failure, enum action_fail_response *on_fail)
4804 {
4805  int old_rc = 0;
4806  bool expired = false;
4807  pcmk_resource_t *parent = rsc;
4808  enum rsc_role_e fail_role = pcmk_role_unknown;
4809  enum action_fail_response failure_strategy = pcmk_on_fail_restart;
4810 
4811  struct action_history history = {
4812  .rsc = rsc,
4813  .node = node,
4814  .xml = xml_op,
4815  .execution_status = PCMK_EXEC_UNKNOWN,
4816  };
4817 
4818  CRM_CHECK(rsc && node && xml_op, return);
4819 
4820  history.id = pcmk__xe_id(xml_op);
4821  if (history.id == NULL) {
4822  pcmk__config_err("Ignoring resource history entry for %s on %s "
4823  "without ID", rsc->id, pcmk__node_name(node));
4824  return;
4825  }
4826 
4827  // Task and interval
4828  history.task = crm_element_value(xml_op, PCMK_XA_OPERATION);
4829  if (history.task == NULL) {
4830  pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4831  "without " PCMK_XA_OPERATION,
4832  history.id, rsc->id, pcmk__node_name(node));
4833  return;
4834  }
4835  crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
4836  if (!can_affect_state(&history)) {
4837  pcmk__rsc_trace(rsc,
4838  "Ignoring resource history entry %s for %s on %s "
4839  "with irrelevant action '%s'",
4840  history.id, rsc->id, pcmk__node_name(node),
4841  history.task);
4842  return;
4843  }
4844 
4845  if (unpack_action_result(&history) != pcmk_rc_ok) {
4846  return; // Error already logged
4847  }
4848 
4849  history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4850  history.key = pcmk__xe_history_key(xml_op);
4851  crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
4852 
4853  pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4854  history.id, history.task, history.call_id,
4855  pcmk__node_name(node),
4856  pcmk_exec_status_str(history.execution_status),
4857  crm_exit_str(history.exit_status));
4858 
4859  if (node->details->unclean) {
4860  pcmk__rsc_trace(rsc,
4861  "%s is running on %s, which is unclean (further action "
4862  "depends on value of stop's on-fail attribute)",
4863  rsc->id, pcmk__node_name(node));
4864  }
4865 
4866  expired = check_operation_expiry(&history);
4867  old_rc = history.exit_status;
4868 
4869  remap_operation(&history, on_fail, expired);
4870 
4871  if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4872  goto done;
4873  }
4874 
4875  if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4876  mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4877  goto done;
4878  }
4879 
4880  if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
4881  parent = uber_parent(rsc);
4882  }
4883 
4884  switch (history.execution_status) {
4885  case PCMK_EXEC_PENDING:
4886  process_pending_action(&history, *last_failure);
4887  goto done;
4888 
4889  case PCMK_EXEC_DONE:
4890  update_resource_state(&history, history.exit_status, *last_failure,
4891  on_fail);
4892  goto done;
4893 
4895  unpack_failure_handling(&history, &failure_strategy, &fail_role);
4896  if (failure_strategy == pcmk_on_fail_ignore) {
4897  crm_warn("Cannot ignore failed %s of %s on %s: "
4898  "Resource agent doesn't exist "
4899  CRM_XS " status=%d rc=%d id=%s",
4900  history.task, rsc->id, pcmk__node_name(node),
4901  history.execution_status, history.exit_status,
4902  history.id);
4903  /* Also for printing it as "FAILED" by marking it as
4904  * pcmk_rsc_failed later
4905  */
4906  *on_fail = pcmk_on_fail_ban;
4907  }
4909  "hard-error", rsc->cluster);
4910  unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4911  last_failure, on_fail);
4912  goto done;
4913 
4915  if (pcmk__is_pacemaker_remote_node(node)
4916  && pcmk_is_set(node->details->remote_rsc->flags,
4917  pcmk_rsc_managed)) {
4918  /* We should never get into a situation where a managed remote
4919  * connection resource is considered OK but a resource action
4920  * behind the connection gets a "not connected" status. But as a
4921  * fail-safe in case a bug or unusual circumstances do lead to
4922  * that, ensure the remote connection is considered failed.
4923  */
4926  }
4927  break; // Not done, do error handling
4928 
4929  case PCMK_EXEC_ERROR:
4930  case PCMK_EXEC_ERROR_HARD:
4931  case PCMK_EXEC_ERROR_FATAL:
4932  case PCMK_EXEC_TIMEOUT:
4934  case PCMK_EXEC_INVALID:
4935  break; // Not done, do error handling
4936 
4937  default: // No other value should be possible at this point
4938  break;
4939  }
4940 
4941  unpack_failure_handling(&history, &failure_strategy, &fail_role);
4942  if ((failure_strategy == pcmk_on_fail_ignore)
4943  || ((failure_strategy == pcmk_on_fail_restart_container)
4944  && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4945 
4946  char *last_change_s = last_change_str(xml_op);
4947 
4948  crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4949  CRM_XS " %s",
4950  history.task, services_ocf_exitcode_str(history.exit_status),
4951  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4952  pcmk__s(history.exit_reason, ""), rsc->id,
4953  pcmk__node_name(node), last_change_s, history.id);
4954  free(last_change_s);
4955 
4956  update_resource_state(&history, history.expected_exit_status,
4957  *last_failure, on_fail);
4958  crm_xml_add(xml_op, PCMK_XA_UNAME, node->details->uname);
4960 
4961  record_failed_op(&history);
4962 
4963  if ((failure_strategy == pcmk_on_fail_restart_container)
4964  && cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) {
4965  *on_fail = failure_strategy;
4966  }
4967 
4968  } else {
4969  unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4970  last_failure, on_fail);
4971 
4972  if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4973  uint8_t log_level = LOG_ERR;
4974 
4975  if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4976  log_level = LOG_NOTICE;
4977  }
4978  do_crm_log(log_level,
4979  "Preventing %s from restarting on %s because "
4980  "of hard failure (%s%s%s) " CRM_XS " %s",
4981  parent->id, pcmk__node_name(node),
4982  services_ocf_exitcode_str(history.exit_status),
4983  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4984  pcmk__s(history.exit_reason, ""), history.id);
4986  "hard-error", rsc->cluster);
4987 
4988  } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4989  pcmk__sched_err("Preventing %s from restarting anywhere because "
4990  "of fatal failure (%s%s%s) " CRM_XS " %s",
4991  parent->id,
4992  services_ocf_exitcode_str(history.exit_status),
4993  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4994  pcmk__s(history.exit_reason, ""), history.id);
4996  "fatal-error", rsc->cluster);
4997  }
4998  }
4999 
5000 done:
5001  pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
5002  rsc->id, pcmk__node_name(node), history.id,
5003  pcmk_role_text(rsc->role),
5004  pcmk_role_text(rsc->next_role));
5005 }
5006 
5007 static void
5008 add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
5010 {
5011  const char *cluster_name = NULL;
5012 
5013  pe_rule_eval_data_t rule_data = {
5014  .node_hash = NULL,
5015  .now = scheduler->now,
5016  .match_data = NULL,
5017  .rsc_data = NULL,
5018  .op_data = NULL
5019  };
5020 
5022  CRM_ATTR_UNAME, node->details->uname);
5023 
5025  if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) {
5026  scheduler->dc_node = node;
5027  node->details->is_dc = TRUE;
5030  } else {
5033  }
5034 
5035  cluster_name = g_hash_table_lookup(scheduler->config_hash,
5037  if (cluster_name) {
5039  cluster_name);
5040  }
5041 
5043  node->details->attrs, NULL, overwrite,
5044  scheduler);
5045 
5046  pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_data,
5047  node->details->utilization, NULL,
5048  FALSE, scheduler);
5049 
5050  if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
5051  pcmk__rsc_node_current) == NULL) {
5052  const char *site_name = pcmk__node_attr(node, "site-name", NULL,
5054 
5055  if (site_name) {
5057  CRM_ATTR_SITE_NAME, site_name);
5058 
5059  } else if (cluster_name) {
5060  /* Default to cluster-name if unset */
5062  CRM_ATTR_SITE_NAME, cluster_name);
5063  }
5064  }
5065 }
5066 
5067 static GList *
5068 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
5069 {
5070  int counter = -1;
5071  int stop_index = -1;
5072  int start_index = -1;
5073 
5074  xmlNode *rsc_op = NULL;
5075 
5076  GList *gIter = NULL;
5077  GList *op_list = NULL;
5078  GList *sorted_op_list = NULL;
5079 
5080  /* extract operations */
5081  op_list = NULL;
5082  sorted_op_list = NULL;
5083 
5084  for (rsc_op = pcmk__xe_first_child(rsc_entry, NULL, NULL, NULL);
5085  rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
5086 
5087  if (pcmk__xe_is(rsc_op, PCMK__XE_LRM_RSC_OP)) {
5088  crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc);
5089  crm_xml_add(rsc_op, PCMK_XA_UNAME, node);
5090  op_list = g_list_prepend(op_list, rsc_op);
5091  }
5092  }
5093 
5094  if (op_list == NULL) {
5095  /* if there are no operations, there is nothing to do */
5096  return NULL;
5097  }
5098 
5099  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
5100 
5101  /* create active recurring operations as optional */
5102  if (active_filter == FALSE) {
5103  return sorted_op_list;
5104  }
5105 
5106  op_list = NULL;
5107 
5108  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
5109 
5110  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5111  xmlNode *rsc_op = (xmlNode *) gIter->data;
5112 
5113  counter++;
5114 
5115  if (start_index < stop_index) {
5116  crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
5117  break;
5118 
5119  } else if (counter < start_index) {
5120  crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op));
5121  continue;
5122  }
5123  op_list = g_list_append(op_list, rsc_op);
5124  }
5125 
5126  g_list_free(sorted_op_list);
5127  return op_list;
5128 }
5129 
5130 GList *
5131 find_operations(const char *rsc, const char *node, gboolean active_filter,
5133 {
5134  GList *output = NULL;
5135  GList *intermediate = NULL;
5136 
5137  xmlNode *tmp = NULL;
5138  xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS,
5139  NULL, NULL);
5140 
5141  pcmk_node_t *this_node = NULL;
5142 
5143  xmlNode *node_state = NULL;
5144 
5145  CRM_CHECK(status != NULL, return NULL);
5146 
5147  for (node_state = pcmk__xe_first_child(status, NULL, NULL, NULL);
5148  node_state != NULL; node_state = pcmk__xe_next(node_state)) {
5149 
5150  if (pcmk__xe_is(node_state, PCMK__XE_NODE_STATE)) {
5151  const char *uname = crm_element_value(node_state, PCMK_XA_UNAME);
5152 
5153  if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5154  continue;
5155  }
5156 
5157  this_node = pcmk_find_node(scheduler, uname);
5158  if(this_node == NULL) {
5159  CRM_LOG_ASSERT(this_node != NULL);
5160  continue;
5161 
5162  } else if (pcmk__is_pacemaker_remote_node(this_node)) {
5163  determine_remote_online_status(scheduler, this_node);
5164 
5165  } else {
5166  determine_online_status(node_state, this_node, scheduler);
5167  }
5168 
5169  if (this_node->details->online
5171  /* offline nodes run no resources...
5172  * unless stonith is enabled in which case we need to
5173  * make sure rsc start events happen after the stonith
5174  */
5175  xmlNode *lrm_rsc = NULL;
5176 
5177  tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
5178  NULL);
5180  NULL);
5181 
5182  for (lrm_rsc = pcmk__xe_first_child(tmp, NULL, NULL, NULL);
5183  lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) {
5184 
5185  if (pcmk__xe_is(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
5186  const char *rsc_id = crm_element_value(lrm_rsc,
5187  PCMK_XA_ID);
5188 
5189  if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
5190  continue;
5191  }
5192 
5193  intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5194  output = g_list_concat(output, intermediate);
5195  }
5196  }
5197  }
5198  }
5199  }
5200 
5201  return output;
5202 }
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition: complex.c:1032
GHashTable * tags
Definition: scheduler.h:253
Services API.
pcmk__cpg_host_t host
Definition: cpg.c:52
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:245
bool pe_can_fence(const pcmk_scheduler_t *scheduler, const pcmk_node_t *node)
Definition: utils.c:36
pcmk_node_t * pcmk_find_node(const pcmk_scheduler_t *scheduler, const char *node_name)
Find a node by name in scheduler data.
Definition: scheduler.c:103
xmlNode * pcmk__xml_copy(xmlNode *parent, xmlNode *src)
Definition: xml.c:883
enum pe_quorum_policy no_quorum_policy
Definition: scheduler.h:217
bool pe__shutdown_requested(const pcmk_node_t *node)
Definition: utils.c:677
A dumping ground.
pcmk_ticket_t * ticket_new(const char *ticket_id, pcmk_scheduler_t *scheduler)
Definition: utils.c:517
pcmk_node_t * pe__copy_node(const pcmk_node_t *this_node)
Definition: utils.c:89
Service failed and possibly in promoted role.
Definition: results.h:193
#define crm_notice(fmt, args...)
Definition: logging.h:397
GHashTable * known_on
Definition: resources.h:459
#define PCMK__XE_LRM_RESOURCES
No connection to executor.
Definition: results.h:341
pcmk_scheduler_t * cluster
Definition: resources.h:408
gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
Definition: unpack.c:623
GHashTable * attrs
Definition: nodes.h:142
#define PCMK_XA_NAME
Definition: xml_names.h:325
const char * pcmk_role_text(enum rsc_role_e role)
Get readable description of a resource role.
Definition: roles.c:23
gboolean unseen
Definition: nodes.h:93
gboolean fixed
Definition: nodes.h:163
char data[0]
Definition: cpg.c:58
#define PCMK_OPT_STONITH_ENABLED
Definition: options.h:65
#define PCMK__XE_TICKET_STATE
pcmk_node_t *(* location)(const pcmk_resource_t *rsc, GList **list, int current)
Definition: resources.h:328
Service active and promoted.
Definition: results.h:192
#define CRM_ATTR_KIND
Definition: crm.h:101
#define ST__LEVEL_MIN
Definition: crm_internal.h:87
bool pe__is_universal_clone(const pcmk_resource_t *rsc, const pcmk_scheduler_t *scheduler)
Definition: clone.c:1283
pcmk_node_t * partial_migration_target
Definition: resources.h:450
int pcmk__xe_copy_attrs(xmlNode *target, const xmlNode *src, uint32_t flags)
Definition: xml.c:584
#define PCMK_VALUE_FALSE
Definition: options.h:152
#define PCMK__XA_RC_CODE
pcmk_resource_t * pe__find_bundle_replica(const pcmk_resource_t *bundle, const pcmk_node_t *node)
Definition: bundle.c:1402
GHashTable * state
Definition: tickets.h:35
int pcmk__scan_min_int(const char *text, int *result, int minimum)
Definition: strings.c:127
#define PCMK_XE_STATUS
Definition: xml_names.h:199
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
Definition: complex.c:1007
#define CRM_ATTR_IS_DC
Definition: crm.h:103
#define stop_action(rsc, node, optional)
Definition: internal.h:214
#define PCMK_XE_TEMPLATE
Definition: xml_names.h:206
Stopped.
Definition: roles.h:36
const char * name
Definition: cib.c:26
bool pcmk__strcase_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:1026
#define PCMK_OPT_CONCURRENT_FENCING
Definition: options.h:33
#define XPATH_NODE_STATE
Definition: unpack.c:2927
#define PCMK_XE_PRIMITIVE
Definition: xml_names.h:160
enum rsc_role_e role
Definition: resources.h:464
gint pe__cmp_node_name(gconstpointer a, gconstpointer b)
Definition: utils.c:145
#define pcmk__config_warn(fmt...)
GList * children
Definition: resources.h:471
#define pcmk__rsc_trace(rsc, fmt, args...)
pcmk__op_digest_t * rsc_action_digest_cmp(pcmk_resource_t *rsc, const xmlNode *xml_op, pcmk_node_t *node, pcmk_scheduler_t *scheduler)
Definition: pe_digest.c:394
Match only clones and their instances, by either clone or instance ID.
Definition: resources.h:191
gboolean standby
Definition: tickets.h:34
#define PCMK_XA_RESOURCE_DISCOVERY
Definition: xml_names.h:379
gboolean order_actions(pcmk_action_t *lh_action, pcmk_action_t *rh_action, uint32_t flags)
Definition: utils.c:457
int priority_fencing_delay
Definition: scheduler.h:261
#define pcmk__rsc_info(rsc, fmt, args...)
#define PCMK_OPT_SHUTDOWN_LOCK
Definition: options.h:60
pcmk_resource_t * pe__create_clone_child(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
Definition: clone.c:247
enum rsc_role_e next_role
Definition: resources.h:465
bool pcmk_is_probe(const char *task, guint interval)
Check whether an action name and interval represent a probe.
Definition: probes.c:30
gboolean get_target_role(const pcmk_resource_t *rsc, enum rsc_role_e *role)
Definition: utils.c:410
int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b, bool same_node_default)
Definition: pe_actions.c:1646
#define pcmk__config_err(fmt...)
#define PCMK_ACTION_META_DATA
Definition: actions.h:56
#define PCMK_ACTION_MONITOR
Definition: actions.h:60
#define PCMK_XA_EXIT_REASON
Definition: xml_names.h:269
GHashTable * meta
Definition: resources.h:467
#define PCMK_XA_NO_QUORUM_PANIC
Definition: xml_names.h:328
Service safely stopped.
Definition: results.h:190
#define set_config_flag(scheduler, option, flag)
Definition: unpack.c:51
pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler)
Definition: pe_actions.c:1281
#define PCMK__XA_RSC_ID
Unspecified error.
Definition: results.h:256
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition: nvpair.c:301
#define PCMK_ACTION_MIGRATE_TO
Definition: actions.h:59
#define PCMK_XA_INDEX
Definition: xml_names.h:300
gboolean pending
Definition: nodes.h:87
#define SUB_XPATH_LRM_RSC_OP
Definition: unpack.c:2932
Promoted.
Definition: roles.h:39
char * id
Definition: tags.h:30
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
Definition: pe_actions.c:1782
#define PCMK_OPT_CLUSTER_NAME
Definition: options.h:31
Necessary CIB secrets are unavailable.
Definition: results.h:344
const char * pcmk_on_fail_text(enum action_fail_response on_fail)
Get string equivalent of a failure handling type.
Definition: actions.c:147
action_fail_response
Definition: actions.h:130
#define PCMK__XA_OP_RESTART_DIGEST
#define CRM_LOG_ASSERT(expr)
Definition: logging.h:228
Service promoted but more likely to fail soon.
Definition: results.h:195
pcmk_action_t * pe__clear_failcount(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *reason, pcmk_scheduler_t *scheduler)
Schedule a controller operation to clear a fail count.
Definition: failcounts.c:458
enum crm_ais_msg_types type
Definition: cpg.c:51
#define CRMD_JOINSTATE_NACK
Definition: crm.h:146
#define CRM_ATTR_CLUSTER_NAME
Definition: crm.h:104
Ensure crm_exit_t can hold this.
Definition: results.h:320
void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:112
GHashTable * tickets
Definition: scheduler.h:222
void pcmk__validate_cluster_options(GHashTable *options)
Definition: options.c:1558
const char * pcmk__cluster_option(GHashTable *options, const char *name)
Definition: options.c:1412
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1551
Action did not complete in time.
Definition: results.h:335
#define PCMK_NODE_ATTR_MAINTENANCE
Definition: nodes.h:30
#define PCMK_OPT_STOP_ORPHAN_ACTIONS
Definition: options.h:70
pcmk_scheduler_t * data_set
Definition: nodes.h:153
pcmk_resource_t * container
Definition: resources.h:476
gboolean remote_was_fenced
Definition: nodes.h:118
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Definition: nvpair.c:482
Execution failed, do not retry on node.
Definition: results.h:338
bool pcmk__ends_with(const char *s, const char *match)
Definition: strings.c:608
#define PCMK_XA_TYPE
Definition: xml_names.h:425
#define PCMK_META_REMOTE_CONNECT_TIMEOUT
Definition: options.h:107
#define PCMK_OPT_STONITH_ACTION
Definition: options.h:64
#define PCMK_XA_RESOURCE
Definition: xml_names.h:377
#define PCMK_XA_OPERATION
Definition: xml_names.h:344
int pe__unpack_resource(xmlNode *xml_obj, pcmk_resource_t **rsc, pcmk_resource_t *parent, pcmk_scheduler_t *scheduler)
Definition: complex.c:639
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
Definition: xpath.c:189
gboolean remote_requires_reset
Definition: nodes.h:112
Action was cancelled.
Definition: results.h:334
gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition: unpack.c:847
#define PCMK_XA_STANDBY
Definition: xml_names.h:401
int pe_get_failcount(const pcmk_node_t *node, pcmk_resource_t *rsc, time_t *last_failure, uint32_t flags, const xmlNode *xml_op)
Definition: failcounts.c:361
No fence device is configured for target.
Definition: results.h:343
const char * action
Definition: pcmk_fence.c:30
#define PCMK_OPT_ENABLE_STARTUP_PROBES
Definition: options.h:38
#define PCMK_META_REMOTE_ALLOW_MIGRATE
Definition: options.h:106
#define PCMK_META_IS_MANAGED
Definition: options.h:92
GList * resources
Definition: scheduler.h:231
#define PCMK__XE_TRANSIENT_ATTRIBUTES
int pcmk__effective_rc(int rc)
Definition: agents.c:72
bool pcmk_xe_is_probe(const xmlNode *xml_op)
Check whether an action history entry represents a probe.
Definition: probes.c:45
#define PCMK__META_MIGRATE_TARGET
gboolean remote_maintenance
Definition: nodes.h:124
#define PCMK_META_REMOTE_ADDR
Definition: options.h:105
#define pcmk__rsc_debug(rsc, fmt, args...)
#define demote_action(rsc, node, optional)
Definition: internal.h:230
gboolean is_dc
Definition: nodes.h:100
char int pcmk_parse_interval_spec(const char *input, guint *result_ms)
Parse milliseconds from a Pacemaker interval specification.
Definition: strings.c:451
pcmk_resource_t *(* find_rsc)(pcmk_resource_t *rsc, const char *search, const pcmk_node_t *node, int flags)
Definition: resources.h:276
gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc)
Parse a transition key into its constituent parts.
Definition: actions.c:426
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port)
Definition: remote.c:125
#define PCMK_OPT_PLACEMENT_STRATEGY
Definition: options.h:57
#define PCMK_ACTION_DEMOTE
Definition: actions.h:49
int weight
Definition: nodes.h:162
#define PCMK__XE_LRM_RESOURCE
#define PCMK__XA_TRANSITION_KEY
int pcmk__scan_ll(const char *text, long long *result, long long default_value)
Definition: strings.c:97
pcmk_resource_t * parent
Definition: resources.h:409
GList * dangling_migrations
Definition: resources.h:474
#define CRMD_JOINSTATE_DOWN
Definition: crm.h:143
Maximum value for this enum.
Definition: results.h:347
#define crm_warn(fmt, args...)
Definition: logging.h:394
pcmk_resource_t * pe_find_resource(GList *rsc_list, const char *id_rh)
Definition: status.c:430
guint remote_reconnect_ms
Definition: resources.h:423
void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
Definition: complex.c:1253
#define PCMK_XE_TAG
Definition: xml_names.h:203
const char * crm_exit_str(crm_exit_t exit_code)
Definition: results.c:640
char * clone_zero(const char *last_rsc_id)
Definition: unpack.c:1977
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition: nvpair.c:539
#define crm_debug(fmt, args...)
Definition: logging.h:402
#define PCMK_XA_UNAME
Definition: xml_names.h:426
#define PCMK_XA_EXPECTED
Definition: xml_names.h:273
Utility functions.
Used only to initialize variables.
Definition: results.h:331
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:446
#define PCMK_OPT_STOP_ALL_RESOURCES
Definition: options.h:69
const char * pe_base_name_end(const char *id)
Definition: unpack.c:1915
xmlNode * pcmk__xe_first_child(const xmlNode *parent, const char *node_name, const char *attr_n, const char *attr_v)
Definition: xml.c:440
void resource_location(pcmk_resource_t *rsc, const pcmk_node_t *node, int score, const char *tag, pcmk_scheduler_t *scheduler)
Definition: utils.c:359
Parameter invalid (in local context)
Definition: results.h:183
gboolean unpacked
Definition: nodes.h:126
#define pcmk__sched_err(fmt...)
#define PCMK_XE_UTILIZATION
Definition: xml_names.h:212
int char2score(const char *score)
Get the integer value of a score string.
Definition: scores.c:36
Parameter invalid (inherently)
Definition: results.h:187
#define CRM_ATTR_UNAME
Definition: crm.h:99
bool pcmk_xe_mask_probe_failure(const xmlNode *xml_op)
Check whether an action history entry represents a maskable probe.
Definition: probes.c:69
#define crm_trace(fmt, args...)
Definition: logging.h:404
#define CRMD_JOINSTATE_MEMBER
Definition: crm.h:145
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:181
#define PCMK__VALUE_PING
void pcmk__g_strcat(GString *buffer,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:1296
bool xml_contains_remote_node(xmlNode *xml)
Definition: remote.c:47
#define PCMK_VALUE_MEMBER
Definition: options.h:169
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:98
Insufficient privileges.
Definition: results.h:185
#define PCMK_OPT_MAINTENANCE_MODE
Definition: options.h:44
#define PCMK_META_REMOTE_NODE
Definition: options.h:108
#define pcmk__clear_scheduler_flags(scheduler, flags_to_clear)
const char * stonith_action
Definition: scheduler.h:205
struct pe_node_shared_s * details
Definition: nodes.h:167
bool pe__bundle_needs_remote_name(pcmk_resource_t *rsc)
Definition: bundle.c:920
#define PCMK_OPT_SHUTDOWN_LOCK_LIMIT
Definition: options.h:61
#define crm_log_xml_debug(xml, text)
Definition: logging.h:411
#define PCMK_XE_CLUSTER_PROPERTY_SET
Definition: xml_names.h:84
#define PCMK_ACTION_START
Definition: actions.h:72
unsigned long long flags
Definition: resources.h:428
const char * uname
Definition: nodes.h:73
#define PCMK_VALUE_IGNORE
Definition: options.h:161
Unpromoted.
Definition: roles.h:38
#define PCMK_OPT_PRIORITY_FENCING_DELAY
Definition: options.h:58
void pcmk__str_update(char **str, const char *value)
Definition: strings.c:1277
Wrappers for and extensions to libxml2.
GHashTable * config_hash
Definition: scheduler.h:219
rsc_role_e
Definition: roles.h:34
#define PCMK_OPT_STARTUP_FENCING
Definition: options.h:63
char * clone_name
Definition: resources.h:397
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
Definition: utils.c:627
#define PCMK_META_TARGET_ROLE
Definition: options.h:113
#define ST__LEVEL_MAX
Definition: crm_internal.h:88
void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pcmk_scheduler_t *scheduler)
Definition: utils.c:719
time_t lock_time
Definition: resources.h:483
Action completed, result is known.
Definition: results.h:333
int crm_element_value_epoch(const xmlNode *xml, const char *name, time_t *dest)
Retrieve the seconds-since-epoch value of an XML attribute.
Definition: nvpair.c:567
#define PCMK_ACTION_STOP
Definition: actions.h:75
#define PCMK__XA_JOIN
Flag has no effect.
Definition: xml_internal.h:438
GHashTable * pe__node_list2table(const GList *list)
Definition: utils.c:115
#define PCMK_NODE_ATTR_TERMINATE
Definition: nodes.h:32
time_t last_granted
Definition: tickets.h:33
#define PCMK_VALUE_TRUE
Definition: options.h:215
#define PCMK_XA_ID
Definition: xml_names.h:296
#define PCMK__ACTION_POWEROFF
#define pcmk__set_rsc_flags(resource, flags_to_set)
Execution failed, do not retry anywhere.
Definition: results.h:339
gboolean standby
Definition: nodes.h:82
#define PCMK__XE_LRM
#define PCMK_NODE_ATTR_STANDBY
Definition: nodes.h:31
#define PCMK_XA_VALUE
Definition: xml_names.h:437
#define PCMK_XA_SCORE
Definition: xml_names.h:391
void pe__free_digests(gpointer ptr)
Definition: pe_digest.c:33
gboolean expected_up
Definition: nodes.h:99
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition: actions.c:196
#define PCMK_OPT_NODE_PENDING_TIMEOUT
Definition: options.h:53
Dependencies not available locally.
Definition: results.h:186
#define PCMK_OPT_START_FAILURE_IS_FATAL
Definition: options.h:62
#define PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
enum pe_obj_types variant
Definition: resources.h:410
bool pcmk__str_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:1050
xmlNode * input
Definition: scheduler.h:196
gboolean granted
Definition: tickets.h:32
#define pcmk__str_copy(str)
#define pcmk__warn_once(wo_flag, fmt...)
#define PCMK_XE_TICKETS
Definition: xml_names.h:208
const char * placement_strategy
Definition: scheduler.h:206
void pe__update_recheck_time(time_t recheck, pcmk_scheduler_t *scheduler, const char *reason)
Definition: utils.c:694
uint32_t id
Definition: cpg.c:48
gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
Definition: unpack.c:214
#define PCMK_VALUE_FENCE_LEGACY
Definition: options.h:224
const char * id
Definition: nodes.h:72
char * id
Definition: tickets.h:31
void native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_scheduler_t *scheduler, gboolean failed)
Definition: native.c:91
pcmk_node_t * pe_find_node_any(const GList *node_list, const char *id, const char *node_name)
Find a node by name or ID in a list of nodes.
Definition: status.c:465
#define PCMK_XE_META_ATTRIBUTES
Definition: xml_names.h:127
guint shutdown_lock
Definition: scheduler.h:260
Unspecified error.
Definition: results.h:181
pcmk_action_t * custom_action(pcmk_resource_t *rsc, char *key, const char *task, const pcmk_node_t *on_node, gboolean optional, pcmk_scheduler_t *scheduler)
Create or update an action object.
Definition: pe_actions.c:1129
GList * refs
Definition: tags.h:31
const char * target
Definition: pcmk_fence.c:29
GList * fillers
Definition: resources.h:477
GList * running_rsc
Definition: nodes.h:139
gboolean rsc_discovery_enabled
Definition: nodes.h:106
#define CRM_XS
Definition: logging.h:56
#define PCMK_VALUE_ONLINE
Definition: options.h:184
Requested action not implemented.
Definition: results.h:184
#define PCMK_OPT_STONITH_TIMEOUT
Definition: options.h:67
int crm_str_to_boolean(const char *s, int *ret)
Definition: strings.c:496
const char * localhost
Definition: scheduler.h:251
xmlXPathObjectPtr xpath_search(const xmlNode *xml_top, const char *path)
Definition: xpath.c:139
int pe__target_rc_from_xml(const xmlNode *xml_op)
Definition: unpack.c:4385
xmlNode * pcmk__find_action_config(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, bool include_disabled)
Definition: pe_actions.c:132
Service active but more likely to fail soon.
Definition: results.h:194
#define PCMK_XE_NODE
Definition: xml_names.h:133
gboolean is_remote_node
Definition: resources.h:431
#define PCMK_META_INTERVAL
Definition: options.h:91
#define PCMK_XA_LAST_RC_CHANGE
Definition: xml_names.h:311
Agent does not implement requested action.
Definition: results.h:336
#define PCMK_XE_FENCING_LEVEL
Definition: xml_names.h:114
GHashTable * pcmk__strkey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition: strings.c:683
pcmk__action_result_t result
Definition: pcmk_fence.c:35
#define PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS
Definition: options.h:137
pcmk_rsc_methods_t * fns
Definition: resources.h:412
gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
Definition: unpack.c:949
guint node_pending_timeout
Definition: scheduler.h:266
G_GNUC_INTERNAL gint pe__cmp_rsc_priority(gconstpointer a, gconstpointer b)
Definition: utils.c:295
gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition: unpack.c:703
#define PCMK_OPT_SYMMETRIC_CLUSTER
Definition: options.h:72
void pe__unpack_node_health_scores(pcmk_scheduler_t *scheduler)
Definition: pe_health.c:24
pcmk_scheduler_t * scheduler
#define PCMK__XE_LRM_RSC_OP
#define CRM_ASSERT(expr)
Definition: results.h:42
pcmk_node_t * lock_node
Definition: resources.h:481
Success.
Definition: results.h:178
GHashTable * node_hash
Definition: common.h:46
char * pcmk__epoch2str(const time_t *source, uint32_t flags)
Definition: iso8601.c:2075
#define PCMK_XA_CRMD
Definition: xml_names.h:251
#define PCMK_META_REMOTE_PORT
Definition: options.h:109
Action is pending.
Definition: results.h:203
#define PCMK_OPT_STOP_ORPHAN_RESOURCES
Definition: options.h:71
#define PCMK_ACTION_MIGRATE_FROM
Definition: actions.h:58
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
Definition: attrs.c:118
#define PCMK__XA_OP_STATUS
#define pcmk__sched_warn(fmt...)
#define PCMK__XA_GRANTED
#define PCMK_META_ON_FAIL
Definition: options.h:98
Started.
Definition: roles.h:37
#define PCMK_XE_OBJ_REF
Definition: xml_names.h:142
Definition: tags.h:29
#define crm_log_xml_info(xml, text)
Definition: logging.h:410
#define PCMK__XA_IN_CCM
GHashTable * utilization
Definition: nodes.h:143
gboolean shutdown
Definition: nodes.h:97
char uname[MAX_NAME]
Definition: cpg.c:53
#define PCMK_ACTION_PROMOTE
Definition: actions.h:66
#define PCMK_OPT_NO_QUORUM_POLICY
Definition: options.h:46
#define PCMK_OPT_HAVE_WATCHDOG
Definition: options.h:40
#define PCMK_XE_GROUP
Definition: xml_names.h:116
#define CRMD_JOINSTATE_PENDING
Definition: crm.h:144
#define PCMK__XE_NODE_STATE
#define PCMK_XA_LAST_GRANTED
Definition: xml_names.h:310
GList * running_on
Definition: resources.h:456
CRM_TRACE_INIT_DATA(pe_status)
Agent or dependency not available locally.
Definition: results.h:340
gboolean maintenance
Definition: nodes.h:104
#define pcmk_ok
Definition: results.h:69
void pe__clear_resource_history(pcmk_resource_t *rsc, const pcmk_node_t *node)
Definition: pe_actions.c:1628
#define PCMK__XA_CALL_ID
GHashTable * digest_cache
Definition: nodes.h:144
#define pcmk__set_action_flags(action, flags_to_set)
void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index)
Definition: unpack.c:2624
GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler)
Definition: unpack.c:5131
#define PCMK_VALUE_STOP
Definition: options.h:209
gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
Definition: unpack.c:1407
Action is in progress.
Definition: results.h:332
#define PCMK__OPT_REMOVE_AFTER_STOP
void destroy_ticket(gpointer data)
Definition: utils.c:505
void pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler)
Definition: unpack.c:922
const char * pcmk__readable_interval(guint interval_ms)
Definition: iso8601.c:2134
pcmk_node_t * pending_node
Definition: resources.h:480
#define SUB_XPATH_LRM_RESOURCE
Definition: unpack.c:2929
enum action_fail_response pcmk__parse_on_fail(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, const char *value)
Definition: pe_actions.c:890
gboolean crm_is_true(const char *s)
Definition: strings.c:488
#define PCMK__META_CONTAINER
#define CRM_ATTR_SITE_NAME
Definition: crm.h:105
#define PCMK_ACTION_OFF
Definition: actions.h:63
pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pcmk_scheduler_t *scheduler)
Definition: unpack.c:455
Resource role is unknown.
Definition: roles.h:35
#define PCMK__META_MIGRATE_SOURCE
#define PCMK_VALUE_FREEZE
Definition: options.h:155
unsigned long long flags
Definition: scheduler.h:211
const char * parent
Definition: cib.c:27
Action cannot be attempted (e.g. shutdown)
Definition: results.h:342
enum rsc_role_e pcmk__role_after_failure(const pcmk_resource_t *rsc, const char *action_name, enum action_fail_response on_fail, GHashTable *meta)
Definition: pe_actions.c:1031
gboolean standby_onfail
Definition: nodes.h:83
xmlNode * pcmk__xe_create(xmlNode *parent, const char *name)
Definition: xml.c:720
#define pcmk__assert_alloc(nmemb, size)
Definition: internal.h:297
time_t get_effective_time(pcmk_scheduler_t *scheduler)
Definition: utils.c:395
void freeXpathObject(xmlXPathObjectPtr xpathObj)
Definition: xpath.c:39
#define PCMK_VALUE_OFFLINE
Definition: options.h:183
#define PCMK_XE_INSTANCE_ATTRIBUTES
Definition: xml_names.h:119
#define CRM_ATTR_ID
Definition: crm.h:100
gboolean unclean
Definition: nodes.h:91
unsigned int timeout
Definition: pcmk_fence.c:32
xmlNode * pcmk__xe_next_same(const xmlNode *node)
Definition: xml.c:2108
void pe__add_param_check(const xmlNode *rsc_op, pcmk_resource_t *rsc, pcmk_node_t *node, enum pcmk__check_parameters, pcmk_scheduler_t *scheduler)
Definition: remote.c:189
#define XPATH_ENABLE_UNFENCING
Definition: unpack.c:193
enum node_type type
Definition: nodes.h:74
#define PCMK_VALUE_REMOTE
Definition: options.h:198
crm_time_t * now
Definition: scheduler.h:198
Execution failed, may be retried.
Definition: results.h:337
#define crm_info(fmt, args...)
Definition: logging.h:399
#define pcmk__clear_rsc_flags(resource, flags_to_clear)
void pcmk__insert_dup(GHashTable *table, const char *name, const char *value)
Definition: strings.c:701
#define pcmk__set_scheduler_flags(scheduler, flags_to_set)
GHashTable * template_rsc_sets
Definition: scheduler.h:248
#define PCMK_VALUE_DEMOTE
Definition: options.h:145
GHashTable * pcmk__unpack_action_meta(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *action_name, guint interval_ms, const xmlNode *action_config)
Definition: pe_actions.c:702
pcmk_node_t * dc_node
Definition: scheduler.h:203
#define PCMK__XA_NODE_FENCED
gboolean online
Definition: nodes.h:80
char * clone_strip(const char *last_rsc_id)
Definition: unpack.c:1955
GList * stop_needed
Definition: scheduler.h:257
#define PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
Definition: options.h:68
enum pcmk__digest_result rc
#define PCMK__XA_NODE_IN_MAINTENANCE
pcmk_resource_t * remote_rsc
Definition: nodes.h:135
pcmk_node_t * partial_migration_source
Definition: resources.h:453
#define PCMK_ACTION_NOTIFY
Definition: actions.h:62
#define PCMK_SCORE_INFINITY
Integer score to use to represent "infinity".
Definition: scores.h:24
GHashTable * allowed_nodes
Definition: resources.h:462
Where resource is running.