pacemaker  2.1.9-49aab99839
Scalable High-Availability cluster resource manager
unpack.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2024 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU Lesser General Public License
7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <stdio.h>
13 #include <string.h>
14 #include <glib.h>
15 #include <time.h>
16 
17 #include <crm/crm.h>
18 #include <crm/services.h>
19 #include <crm/common/xml.h>
21 
22 #include <crm/common/util.h>
23 #include <crm/pengine/rules.h>
24 #include <crm/pengine/internal.h>
25 #include <pe_status_private.h>
26 
27 CRM_TRACE_INIT_DATA(pe_status);
28 
29 // A (parsed) resource action history entry
30 struct action_history {
31  pcmk_resource_t *rsc; // Resource that history is for
32  pcmk_node_t *node; // Node that history is for
33  xmlNode *xml; // History entry XML
34 
35  // Parsed from entry XML
36  const char *id; // XML ID of history entry
37  const char *key; // Operation key of action
38  const char *task; // Action name
39  const char *exit_reason; // Exit reason given for result
40  guint interval_ms; // Action interval
41  int call_id; // Call ID of action
42  int expected_exit_status; // Expected exit status of action
43  int exit_status; // Actual exit status of action
44  int execution_status; // Execution status of action
45 };
46 
47 /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
48  * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
49  * flag is stringified more readably in log messages.
50  */
51 #define set_config_flag(scheduler, option, flag) do { \
52  GHashTable *config_hash = (scheduler)->config_hash; \
53  const char *scf_value = pcmk__cluster_option(config_hash, (option)); \
54  \
55  if (scf_value != NULL) { \
56  if (crm_is_true(scf_value)) { \
57  (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \
58  LOG_TRACE, "Scheduler", \
59  crm_system_name, (scheduler)->flags, \
60  (flag), #flag); \
61  } else { \
62  (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
63  LOG_TRACE, "Scheduler", \
64  crm_system_name, (scheduler)->flags, \
65  (flag), #flag); \
66  } \
67  } \
68  } while(0)
69 
70 static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
71  xmlNode *xml_op, xmlNode **last_failure,
72  enum action_fail_response *failed);
73 static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
74  pcmk_node_t *this_node);
75 static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
76  bool overwrite, pcmk_scheduler_t *scheduler);
77 static void determine_online_status(const xmlNode *node_state,
78  pcmk_node_t *this_node,
80 
81 static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
83 
84 
85 static gboolean
86 is_dangling_guest_node(pcmk_node_t *node)
87 {
88  /* we are looking for a remote-node that was supposed to be mapped to a
89  * container resource, but all traces of that container have disappeared
90  * from both the config and the status section. */
91  if (pcmk__is_pacemaker_remote_node(node)
92  && (node->details->remote_rsc != NULL)
93  && (node->details->remote_rsc->container == NULL)
96  return TRUE;
97  }
98 
99  return FALSE;
100 }
101 
111 void
113  const char *reason, bool priority_delay)
114 {
115  CRM_CHECK(node, return);
116 
117  /* A guest node is fenced by marking its container as failed */
118  if (pcmk__is_guest_or_bundle_node(node)) {
120 
121  if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
122  if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
123  crm_notice("Not fencing guest node %s "
124  "(otherwise would because %s): "
125  "its guest resource %s is unmanaged",
126  pcmk__node_name(node), reason, rsc->id);
127  } else {
128  pcmk__sched_warn("Guest node %s will be fenced "
129  "(by recovering its guest resource %s): %s",
130  pcmk__node_name(node), rsc->id, reason);
131 
132  /* We don't mark the node as unclean because that would prevent the
133  * node from running resources. We want to allow it to run resources
134  * in this transition if the recovery succeeds.
135  */
136  node->details->remote_requires_reset = TRUE;
139  }
140  }
141 
142  } else if (is_dangling_guest_node(node)) {
143  crm_info("Cleaning up dangling connection for guest node %s: "
144  "fencing was already done because %s, "
145  "and guest resource no longer exists",
146  pcmk__node_name(node), reason);
149 
150  } else if (pcmk__is_remote_node(node)) {
151  pcmk_resource_t *rsc = node->details->remote_rsc;
152 
153  if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
154  crm_notice("Not fencing remote node %s "
155  "(otherwise would because %s): connection is unmanaged",
156  pcmk__node_name(node), reason);
157  } else if(node->details->remote_requires_reset == FALSE) {
158  node->details->remote_requires_reset = TRUE;
159  pcmk__sched_warn("Remote node %s %s: %s",
160  pcmk__node_name(node),
161  pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
162  reason);
163  }
164  node->details->unclean = TRUE;
165  // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
166  pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
167 
168  } else if (node->details->unclean) {
169  crm_trace("Cluster node %s %s because %s",
170  pcmk__node_name(node),
171  pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
172  reason);
173 
174  } else {
175  pcmk__sched_warn("Cluster node %s %s: %s",
176  pcmk__node_name(node),
177  pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
178  reason);
179  node->details->unclean = TRUE;
180  pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
181  }
182 }
183 
184 // @TODO xpaths can't handle templates, rules, or id-refs
185 
186 // nvpair with provides or requires set to unfencing
187 #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \
188  "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'" \
189  "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \
190  "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
191 
192 // unfencing in rsc_defaults or any resource
193 #define XPATH_ENABLE_UNFENCING \
194  "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \
195  "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \
196  "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
197  "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
198 
199 static void
200 set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
201 {
202  xmlXPathObjectPtr result = NULL;
203 
204  if (!pcmk_is_set(scheduler->flags, flag)) {
205  result = xpath_search(scheduler->input, xpath);
206  if (result && (numXpathResults(result) > 0)) {
208  }
210  }
211 }
212 
213 gboolean
215 {
216  const char *value = NULL;
217  guint interval_ms = 0U;
218  GHashTable *config_hash = pcmk__strkey_table(free, free);
219 
220  pe_rule_eval_data_t rule_data = {
221  .node_hash = NULL,
222  .now = scheduler->now,
223  .match_data = NULL,
224  .rsc_data = NULL,
225  .op_data = NULL
226  };
227 
228  scheduler->config_hash = config_hash;
229 
232  FALSE, scheduler);
233 
234  pcmk__validate_cluster_options(config_hash);
235 
239  crm_info("Startup probes: disabled (dangerous)");
240  }
241 
242  value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
243  if (value && crm_is_true(value)) {
244  crm_info("Watchdog-based self-fencing will be performed via SBD if "
245  "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
246  " is nonzero");
248  }
249 
250  /* Set certain flags via xpath here, so they can be used before the relevant
251  * configuration sections are unpacked.
252  */
254  scheduler);
255 
256  value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT);
257  pcmk_parse_interval_spec(value, &interval_ms);
258 
259  if (interval_ms >= INT_MAX) {
260  scheduler->stonith_timeout = INT_MAX;
261  } else {
262  scheduler->stonith_timeout = (int) interval_ms;
263  }
264  crm_debug("STONITH timeout: %d", scheduler->stonith_timeout);
265 
269  crm_debug("STONITH of failed nodes is enabled");
270  } else {
271  crm_debug("STONITH of failed nodes is disabled");
272  }
273 
278  "Support for " PCMK_OPT_STONITH_ACTION " of "
279  "'" PCMK__ACTION_POWEROFF "' is deprecated and will be "
280  "removed in a future release "
281  "(use '" PCMK_ACTION_OFF "' instead)");
283  }
284  crm_trace("STONITH will %s nodes", scheduler->stonith_action);
285 
289  crm_debug("Concurrent fencing is enabled");
290  } else {
291  crm_debug("Concurrent fencing is disabled");
292  }
293 
295  if (value) {
296  pcmk_parse_interval_spec(value, &interval_ms);
297  scheduler->priority_fencing_delay = (int) (interval_ms / 1000);
298  crm_trace("Priority fencing delay is %ds",
300  }
301 
304  crm_debug("Stop all active resources: %s",
305  pcmk__flag_text(scheduler->flags, pcmk_sched_stop_all));
306 
310  crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
311  }
312 
313  value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
314 
315  if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
317 
318  } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
320 
321  } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
323 
324  } else if (pcmk__strcase_any_of(value, PCMK_VALUE_FENCE,
325  PCMK_VALUE_FENCE_LEGACY, NULL)) {
327  int do_panic = 0;
328 
330  &do_panic);
331  if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
333  } else {
335  " to 'stop': cluster has never had quorum");
337  }
338  } else {
340  " to 'stop' because fencing is disabled");
342  }
343 
344  } else {
346  }
347 
348  switch (scheduler->no_quorum_policy) {
350  crm_debug("On loss of quorum: Freeze resources");
351  break;
352  case pcmk_no_quorum_stop:
353  crm_debug("On loss of quorum: Stop ALL resources");
354  break;
356  crm_debug("On loss of quorum: "
357  "Demote promotable resources and stop other resources");
358  break;
360  crm_notice("On loss of quorum: Fence all remaining nodes");
361  break;
363  crm_notice("On loss of quorum: Ignore");
364  break;
365  }
366 
370  crm_trace("Orphan resources are stopped");
371  } else {
372  crm_trace("Orphan resources are ignored");
373  }
374 
378  crm_trace("Orphan resource actions are stopped");
379  } else {
380  crm_trace("Orphan resource actions are ignored");
381  }
382 
384  if (value != NULL) {
385  if (crm_is_true(value)) {
388  "Support for the " PCMK__OPT_REMOVE_AFTER_STOP
389  " cluster property is deprecated and will be "
390  "removed in a future release");
391  } else {
394  }
395  }
396 
399  crm_trace("Maintenance mode: %s",
400  pcmk__flag_text(scheduler->flags, pcmk_sched_in_maintenance));
401 
405  crm_trace("Start failures are always fatal");
406  } else {
407  crm_trace("Start failures are handled by failcount");
408  }
409 
413  }
415  crm_trace("Unseen nodes will be fenced");
416  } else {
418  "Blind faith: not fencing unseen nodes");
419  }
420 
422 
425  crm_trace("Placement strategy: %s", scheduler->placement_strategy);
426 
432  scheduler->shutdown_lock /= 1000;
433  crm_trace("Resources will be locked to nodes that were cleanly "
434  "shut down (locks expire after %s)",
436  } else {
437  crm_trace("Resources will not be locked to nodes that were cleanly "
438  "shut down");
439  }
440 
444  if (scheduler->node_pending_timeout == 0) {
445  crm_trace("Do not fence pending nodes");
446  } else {
447  crm_trace("Fence pending nodes after %s",
449  * 1000));
450  }
451 
452  return TRUE;
453 }
454 
469 pcmk_node_t *
470 pe_create_node(const char *id, const char *uname, const char *type,
471  int score, pcmk_scheduler_t *scheduler)
472 {
473  pcmk_node_t *new_node = NULL;
474 
475  if (pcmk_find_node(scheduler, uname) != NULL) {
476  pcmk__config_warn("More than one node entry has name '%s'", uname);
477  }
478 
479  new_node = calloc(1, sizeof(pcmk_node_t));
480  if (new_node == NULL) {
481  pcmk__sched_err("Could not allocate memory for node %s", uname);
482  return NULL;
483  }
484 
485  new_node->weight = score;
486  new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
487 
488  if (new_node->details == NULL) {
489  free(new_node);
490  pcmk__sched_err("Could not allocate memory for node %s", uname);
491  return NULL;
492  }
493 
494  crm_trace("Creating node for entry %s/%s", uname, id);
495  new_node->details->id = id;
496  new_node->details->uname = uname;
497  new_node->details->online = FALSE;
498  new_node->details->shutdown = FALSE;
499  new_node->details->rsc_discovery_enabled = TRUE;
500  new_node->details->running_rsc = NULL;
501  new_node->details->data_set = scheduler;
502 
503  if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
506 
507  } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
510 
511  } else {
512  /* @COMPAT 'ping' is the default for backward compatibility, but it
513  * should be changed to 'member' at a compatibility break
514  */
515  if (!pcmk__str_eq(type, PCMK__VALUE_PING, pcmk__str_casei)) {
516  pcmk__config_warn("Node %s has unrecognized type '%s', "
517  "assuming '" PCMK__VALUE_PING "'",
518  pcmk__s(uname, "without name"), type);
519  }
521  "Support for nodes of type '" PCMK__VALUE_PING "' "
522  "(such as %s) is deprecated and will be removed in a "
523  "future release",
524  pcmk__s(uname, "unnamed node"));
525  new_node->details->type = node_ping;
526  }
527 
528  new_node->details->attrs = pcmk__strkey_table(free, free);
529 
530  if (pcmk__is_pacemaker_remote_node(new_node)) {
531  pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "remote");
532  } else {
533  pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "cluster");
534  }
535 
536  new_node->details->utilization = pcmk__strkey_table(free, free);
537  new_node->details->digest_cache = pcmk__strkey_table(free,
539 
540  scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
542  return new_node;
543 }
544 
545 static const char *
546 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
547 {
548  xmlNode *attr_set = NULL;
549  xmlNode *attr = NULL;
550 
551  const char *container_id = pcmk__xe_id(xml_obj);
552  const char *remote_name = NULL;
553  const char *remote_server = NULL;
554  const char *remote_port = NULL;
555  const char *connect_timeout = "60s";
556  const char *remote_allow_migrate=NULL;
557  const char *is_managed = NULL;
558 
559  for (attr_set = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
560  attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) {
561 
562  if (!pcmk__xe_is(attr_set, PCMK_XE_META_ATTRIBUTES)) {
563  continue;
564  }
565 
566  for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
567  attr != NULL; attr = pcmk__xe_next(attr)) {
568 
569  const char *value = crm_element_value(attr, PCMK_XA_VALUE);
570  const char *name = crm_element_value(attr, PCMK_XA_NAME);
571 
572  if (name == NULL) { // Sanity
573  continue;
574  }
575 
576  if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
577  remote_name = value;
578 
579  } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
580  remote_server = value;
581 
582  } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
583  remote_port = value;
584 
585  } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
586  connect_timeout = value;
587 
588  } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
589  remote_allow_migrate = value;
590 
591  } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
592  is_managed = value;
593  }
594  }
595  }
596 
597  if (remote_name == NULL) {
598  return NULL;
599  }
600 
601  if (pe_find_resource(data->resources, remote_name) != NULL) {
602  return NULL;
603  }
604 
605  pe_create_remote_xml(parent, remote_name, container_id,
606  remote_allow_migrate, is_managed,
607  connect_timeout, remote_server, remote_port);
608  return remote_name;
609 }
610 
611 static void
612 handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
613 {
614  if ((new_node->details->type == pcmk_node_variant_remote)
615  && (new_node->details->remote_rsc == NULL)) {
616  /* Ignore fencing for remote nodes that don't have a connection resource
617  * associated with them. This happens when remote node entries get left
618  * in the nodes section after the connection resource is removed.
619  */
620  return;
621  }
622 
624  // All nodes are unclean until we've seen their status entry
625  new_node->details->unclean = TRUE;
626 
627  } else {
628  // Blind faith ...
629  new_node->details->unclean = FALSE;
630  }
631 
632  /* We need to be able to determine if a node's status section
633  * exists or not separate from whether the node is unclean. */
634  new_node->details->unseen = TRUE;
635 }
636 
637 gboolean
639 {
640  xmlNode *xml_obj = NULL;
641  pcmk_node_t *new_node = NULL;
642  const char *id = NULL;
643  const char *uname = NULL;
644  const char *type = NULL;
645 
646  for (xml_obj = pcmk__xe_first_child(xml_nodes, NULL, NULL, NULL);
647  xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
648 
649  if (pcmk__xe_is(xml_obj, PCMK_XE_NODE)) {
650  int score = 0;
651  int rc = pcmk__xe_get_score(xml_obj, PCMK_XA_SCORE, &score, 0);
652 
653  new_node = NULL;
654 
655  id = crm_element_value(xml_obj, PCMK_XA_ID);
657  type = crm_element_value(xml_obj, PCMK_XA_TYPE);
658  crm_trace("Processing node %s/%s", uname, id);
659 
660  if (id == NULL) {
661  pcmk__config_err("Ignoring <" PCMK_XE_NODE
662  "> entry in configuration without id");
663  continue;
664  }
665  if (rc != pcmk_rc_ok) {
666  // Not possible with schema validation enabled
667  pcmk__config_warn("Using 0 as score for node %s "
668  "because '%s' is not a valid score: %s",
669  pcmk__s(uname, "without name"),
671  pcmk_rc_str(rc));
672  }
673  new_node = pe_create_node(id, uname, type, score, scheduler);
674 
675  if (new_node == NULL) {
676  return FALSE;
677  }
678 
679  handle_startup_fencing(scheduler, new_node);
680 
681  add_node_attrs(xml_obj, new_node, FALSE, scheduler);
682 
683  crm_trace("Done with node %s",
684  crm_element_value(xml_obj, PCMK_XA_UNAME));
685  }
686  }
687 
688  if (scheduler->localhost
689  && (pcmk_find_node(scheduler, scheduler->localhost) == NULL)) {
690  crm_info("Creating a fake local node");
692  scheduler);
693  }
694 
695  return TRUE;
696 }
697 
698 static void
699 setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
700 {
701  const char *container_id = NULL;
702 
703  if (rsc->children) {
704  g_list_foreach(rsc->children, (GFunc) setup_container, scheduler);
705  return;
706  }
707 
708  container_id = g_hash_table_lookup(rsc->meta, PCMK__META_CONTAINER);
709  if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) {
711  container_id);
712 
713  if (container) {
714  rsc->container = container;
716  container->fillers = g_list_append(container->fillers, rsc);
717  pcmk__rsc_trace(rsc, "Resource %s's container is %s",
718  rsc->id, container_id);
719  } else {
720  pcmk__config_err("Resource %s: Unknown resource container (%s)",
721  rsc->id, container_id);
722  }
723  }
724 }
725 
726 gboolean
728 {
729  xmlNode *xml_obj = NULL;
730 
731  /* Create remote nodes and guest nodes from the resource configuration
732  * before unpacking resources.
733  */
734  for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
735  xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
736 
737  const char *new_node_id = NULL;
738 
739  /* Check for remote nodes, which are defined by ocf:pacemaker:remote
740  * primitives.
741  */
742  if (xml_contains_remote_node(xml_obj)) {
743  new_node_id = pcmk__xe_id(xml_obj);
744  /* The pcmk_find_node() check ensures we don't iterate over an
745  * expanded node that has already been added to the node list
746  */
747  if (new_node_id
748  && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
749  crm_trace("Found remote node %s defined by resource %s",
750  new_node_id, pcmk__xe_id(xml_obj));
751  pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
752  0, scheduler);
753  }
754  continue;
755  }
756 
757  /* Check for guest nodes, which are defined by special meta-attributes
758  * of a primitive of any type (for example, VirtualDomain or Xen).
759  */
760  if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
761  /* This will add an ocf:pacemaker:remote primitive to the
762  * configuration for the guest node's connection, to be unpacked
763  * later.
764  */
765  new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
766  scheduler);
767  if (new_node_id
768  && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
769  crm_trace("Found guest node %s in resource %s",
770  new_node_id, pcmk__xe_id(xml_obj));
771  pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
772  0, scheduler);
773  }
774  continue;
775  }
776 
777  /* Check for guest nodes inside a group. Clones are currently not
778  * supported as guest nodes.
779  */
780  if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
781  xmlNode *xml_obj2 = NULL;
782  for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
783  xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) {
784 
785  new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
786  scheduler);
787 
788  if (new_node_id
789  && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
790  crm_trace("Found guest node %s in resource %s inside group %s",
791  new_node_id, pcmk__xe_id(xml_obj2),
792  pcmk__xe_id(xml_obj));
793  pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
794  0, scheduler);
795  }
796  }
797  }
798  }
799  return TRUE;
800 }
801 
802 /* Call this after all the nodes and resources have been
803  * unpacked, but before the status section is read.
804  *
805  * A remote node's online status is reflected by the state
806  * of the remote node's connection resource. We need to link
807  * the remote node to this connection resource so we can have
808  * easy access to the connection resource during the scheduler calculations.
809  */
810 static void
811 link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
812 {
813  pcmk_node_t *remote_node = NULL;
814 
815  if (new_rsc->is_remote_node == FALSE) {
816  return;
817  }
818 
820  /* remote_nodes and remote_resources are not linked in quick location calculations */
821  return;
822  }
823 
824  remote_node = pcmk_find_node(scheduler, new_rsc->id);
825  CRM_CHECK(remote_node != NULL, return);
826 
827  pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
828  new_rsc->id, pcmk__node_name(remote_node));
829  remote_node->details->remote_rsc = new_rsc;
830 
831  if (new_rsc->container == NULL) {
832  /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
833  * the same as is done for cluster nodes.
834  */
835  handle_startup_fencing(scheduler, remote_node);
836 
837  } else {
838  /* pe_create_node() marks the new node as "remote" or "cluster"; now
839  * that we know the node is a guest node, update it correctly.
840  */
841  pcmk__insert_dup(remote_node->details->attrs,
842  CRM_ATTR_KIND, "container");
843  }
844 }
845 
846 static void
847 destroy_tag(gpointer data)
848 {
849  pcmk_tag_t *tag = data;
850 
851  if (tag) {
852  free(tag->id);
853  g_list_free_full(tag->refs, free);
854  free(tag);
855  }
856 }
857 
870 gboolean
871 unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
872 {
873  xmlNode *xml_obj = NULL;
874  GList *gIter = NULL;
875 
876  scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag);
877 
878  for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
879  xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
880 
881  pcmk_resource_t *new_rsc = NULL;
882  const char *id = pcmk__xe_id(xml_obj);
883 
884  if (pcmk__str_empty(id)) {
885  pcmk__config_err("Ignoring <%s> resource without ID",
886  xml_obj->name);
887  continue;
888  }
889 
890  if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
891  if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id,
892  NULL, NULL) == FALSE) {
893  /* Record the template's ID for the knowledge of its existence anyway. */
895  }
896  continue;
897  }
898 
899  crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
900  if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
901  scheduler) == pcmk_rc_ok) {
902  scheduler->resources = g_list_append(scheduler->resources, new_rsc);
903  pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
904 
905  } else {
906  pcmk__config_err("Ignoring <%s> resource '%s' "
907  "because configuration is invalid",
908  xml_obj->name, id);
909  }
910  }
911 
912  for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) {
913  pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
914 
915  setup_container(rsc, scheduler);
916  link_rsc2remotenode(scheduler, rsc);
917  }
918 
919  scheduler->resources = g_list_sort(scheduler->resources,
922  /* Ignore */
923 
926 
927  pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
928  pcmk__config_err("Either configure some or disable STONITH with the "
929  PCMK_OPT_STONITH_ENABLED " option");
930  pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
931  }
932 
933  return TRUE;
934 }
935 
945 void
946 pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler)
947 {
948  xmlNode *xml_obj = NULL;
949  int id = 0;
950 
951  for (xml_obj = pcmk__xe_first_child(xml_fencing_topology, PCMK_XE_FENCING_LEVEL, NULL, NULL);
952  xml_obj != NULL; xml_obj = pcmk__xe_next_same(xml_obj)) {
953 
954  crm_element_value_int(xml_obj, PCMK_XA_INDEX, &id);
955 
956  // Ensure an ID was given
957  if (pcmk__str_empty(pcmk__xe_id(xml_obj))) {
958  pcmk__config_warn("Ignoring registration for topology level without ID");
959  continue;
960  }
961 
962  // Ensure level ID is in allowed range
963  if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) {
964  pcmk__config_warn("Ignoring topology registration with invalid level %d",
965  id);
966  continue;
967  }
968 
969  }
970 }
971 
972 gboolean
974 {
975  xmlNode *xml_tag = NULL;
976 
977  scheduler->tags = pcmk__strkey_table(free, destroy_tag);
978 
979  for (xml_tag = pcmk__xe_first_child(xml_tags, NULL, NULL, NULL);
980  xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) {
981 
982  xmlNode *xml_obj_ref = NULL;
983  const char *tag_id = pcmk__xe_id(xml_tag);
984 
985  if (!pcmk__xe_is(xml_tag, PCMK_XE_TAG)) {
986  continue;
987  }
988 
989  if (tag_id == NULL) {
990  pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
991  (const char *) xml_tag->name);
992  continue;
993  }
994 
995  for (xml_obj_ref = pcmk__xe_first_child(xml_tag, NULL, NULL, NULL);
996  xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
997 
998  const char *obj_ref = pcmk__xe_id(xml_obj_ref);
999 
1000  if (!pcmk__xe_is(xml_obj_ref, PCMK_XE_OBJ_REF)) {
1001  continue;
1002  }
1003 
1004  if (obj_ref == NULL) {
1005  pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
1006  xml_obj_ref->name, tag_id);
1007  continue;
1008  }
1009 
1010  if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) {
1011  return FALSE;
1012  }
1013  }
1014  }
1015 
1016  return TRUE;
1017 }
1018 
1019 /* The ticket state section:
1020  * "/cib/status/tickets/ticket_state" */
1021 static gboolean
1022 unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler)
1023 {
1024  const char *ticket_id = NULL;
1025  const char *granted = NULL;
1026  const char *last_granted = NULL;
1027  const char *standby = NULL;
1028  xmlAttrPtr xIter = NULL;
1029 
1030  pcmk_ticket_t *ticket = NULL;
1031 
1032  ticket_id = pcmk__xe_id(xml_ticket);
1033  if (pcmk__str_empty(ticket_id)) {
1034  return FALSE;
1035  }
1036 
1037  crm_trace("Processing ticket state for %s", ticket_id);
1038 
1039  ticket = g_hash_table_lookup(scheduler->tickets, ticket_id);
1040  if (ticket == NULL) {
1041  ticket = ticket_new(ticket_id, scheduler);
1042  if (ticket == NULL) {
1043  return FALSE;
1044  }
1045  }
1046 
1047  for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1048  const char *prop_name = (const char *)xIter->name;
1049  const char *prop_value = pcmk__xml_attr_value(xIter);
1050 
1051  if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
1052  continue;
1053  }
1054  pcmk__insert_dup(ticket->state, prop_name, prop_value);
1055  }
1056 
1057  granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
1058  if (granted && crm_is_true(granted)) {
1059  ticket->granted = TRUE;
1060  crm_info("We have ticket '%s'", ticket->id);
1061  } else {
1062  ticket->granted = FALSE;
1063  crm_info("We do not have ticket '%s'", ticket->id);
1064  }
1065 
1066  last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
1067  if (last_granted) {
1068  long long last_granted_ll = 0LL;
1069  int rc = pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1070 
1071  if (rc != pcmk_rc_ok) {
1072  crm_warn("Using %lld instead of invalid " PCMK_XA_LAST_GRANTED
1073  " value '%s' in state for ticket %s: %s",
1074  last_granted_ll, last_granted, ticket->id,
1075  pcmk_rc_str(rc));
1076  }
1077  ticket->last_granted = (time_t) last_granted_ll;
1078  }
1079 
1080  standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
1081  if (standby && crm_is_true(standby)) {
1082  ticket->standby = TRUE;
1083  if (ticket->granted) {
1084  crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
1085  }
1086  } else {
1087  ticket->standby = FALSE;
1088  }
1089 
1090  crm_trace("Done with ticket state for %s", ticket_id);
1091 
1092  return TRUE;
1093 }
1094 
1095 static gboolean
1096 unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler)
1097 {
1098  xmlNode *xml_obj = NULL;
1099 
1100  for (xml_obj = pcmk__xe_first_child(xml_tickets, NULL, NULL, NULL);
1101  xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
1102 
1103  if (!pcmk__xe_is(xml_obj, PCMK__XE_TICKET_STATE)) {
1104  continue;
1105  }
1106  unpack_ticket_state(xml_obj, scheduler);
1107  }
1108 
1109  return TRUE;
1110 }
1111 
1112 static void
1113 unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
1115 {
1116  const char *discovery = NULL;
1117  const xmlNode *attrs = NULL;
1118  pcmk_resource_t *rsc = NULL;
1119 
1120  if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1121  return;
1122  }
1123 
1124  if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1125  return;
1126  }
1127  crm_trace("Processing Pacemaker Remote node %s",
1128  pcmk__node_name(this_node));
1129 
1131  &(this_node->details->remote_maintenance), 0);
1132 
1133  rsc = this_node->details->remote_rsc;
1134  if (this_node->details->remote_requires_reset == FALSE) {
1135  this_node->details->unclean = FALSE;
1136  this_node->details->unseen = FALSE;
1137  }
1139  NULL);
1140  add_node_attrs(attrs, this_node, TRUE, scheduler);
1141 
1142  if (pe__shutdown_requested(this_node)) {
1143  crm_info("%s is shutting down", pcmk__node_name(this_node));
1144  this_node->details->shutdown = TRUE;
1145  }
1146 
1147  if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL,
1149  crm_info("%s is in standby mode", pcmk__node_name(this_node));
1150  this_node->details->standby = TRUE;
1151  }
1152 
1155  || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) {
1156  crm_info("%s is in maintenance mode", pcmk__node_name(this_node));
1157  this_node->details->maintenance = TRUE;
1158  }
1159 
1160  discovery = pcmk__node_attr(this_node,
1162  NULL, pcmk__rsc_node_current);
1163  if ((discovery != NULL) && !crm_is_true(discovery)) {
1165  "Support for the "
1167  " node attribute is deprecated and will be removed"
1168  " (and behave as 'true') in a future release.");
1169 
1170  if (pcmk__is_remote_node(this_node)
1172  pcmk__config_warn("Ignoring "
1174  " attribute on Pacemaker Remote node %s"
1175  " because fencing is disabled",
1176  pcmk__node_name(this_node));
1177  } else {
1178  /* This is either a remote node with fencing enabled, or a guest
1179  * node. We don't care whether fencing is enabled when fencing guest
1180  * nodes, because they are "fenced" by recovering their containing
1181  * resource.
1182  */
1183  crm_info("%s has resource discovery disabled",
1184  pcmk__node_name(this_node));
1185  this_node->details->rsc_discovery_enabled = FALSE;
1186  }
1187  }
1188 }
1189 
1198 static void
1199 unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
1201 {
1202  const char *discovery = NULL;
1203  const xmlNode *attrs = pcmk__xe_first_child(state,
1205  NULL, NULL);
1206 
1207  add_node_attrs(attrs, node, TRUE, scheduler);
1208 
1211  crm_info("%s is in standby mode", pcmk__node_name(node));
1212  node->details->standby = TRUE;
1213  }
1214 
1217  crm_info("%s is in maintenance mode", pcmk__node_name(node));
1218  node->details->maintenance = TRUE;
1219  }
1220 
1221  discovery = pcmk__node_attr(node,
1223  NULL, pcmk__rsc_node_current);
1224  if ((discovery != NULL) && !crm_is_true(discovery)) {
1225  pcmk__config_warn("Ignoring "
1227  " attribute for %s because disabling resource"
1228  " discovery is not allowed for cluster nodes",
1229  pcmk__node_name(node));
1230  }
1231 }
1232 
1245 static void
1246 unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
1247 {
1248  const char *id = NULL;
1249  const char *uname = NULL;
1250  pcmk_node_t *this_node = NULL;
1251 
1252  id = crm_element_value(state, PCMK_XA_ID);
1253  if (id == NULL) {
1254  pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
1255  PCMK_XA_ID);
1256  crm_log_xml_info(state, "missing-id");
1257  return;
1258  }
1259 
1261  if (uname == NULL) {
1262  /* If a joining peer makes the cluster acquire the quorum from corosync
1263  * meanwhile it has not joined CPG membership of pacemaker-controld yet,
1264  * it's possible that the created PCMK__XE_NODE_STATE entry doesn't have
1265  * a PCMK_XA_UNAME yet. We should recognize the node as `pending` and
1266  * wait for it to join CPG.
1267  */
1268  crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
1269  "without " PCMK_XA_UNAME,
1270  id);
1271  }
1272 
1273  this_node = pe_find_node_any(scheduler->nodes, id, uname);
1274  if (this_node == NULL) {
1275  crm_notice("Ignoring recorded state for removed node with name %s and "
1276  PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id);
1277  return;
1278  }
1279 
1280  if (pcmk__is_pacemaker_remote_node(this_node)) {
1281  /* We can't determine the online status of Pacemaker Remote nodes until
1282  * after all resource history has been unpacked. In this first pass, we
1283  * do need to mark whether the node has been fenced, as this plays a
1284  * role during unpacking cluster node resource state.
1285  */
1287  &(this_node->details->remote_was_fenced), 0);
1288  return;
1289  }
1290 
1291  unpack_transient_attributes(state, this_node, scheduler);
1292 
1293  /* Provisionally mark this cluster node as clean. We have at least seen it
1294  * in the current cluster's lifetime.
1295  */
1296  this_node->details->unclean = FALSE;
1297  this_node->details->unseen = FALSE;
1298 
1299  crm_trace("Determining online status of cluster node %s (id %s)",
1300  pcmk__node_name(this_node), id);
1301  determine_online_status(state, this_node, scheduler);
1302 
1304  && this_node->details->online
1306  /* Everything else should flow from this automatically
1307  * (at least until the scheduler becomes able to migrate off
1308  * healthy resources)
1309  */
1310  pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1311  FALSE);
1312  }
1313 }
1314 
1332 static int
1333 unpack_node_history(const xmlNode *status, bool fence,
1335 {
1336  int rc = pcmk_rc_ok;
1337 
1338  // Loop through all PCMK__XE_NODE_STATE entries in CIB status
1339  for (const xmlNode *state = pcmk__xe_first_child(status,
1340  PCMK__XE_NODE_STATE, NULL,
1341  NULL);
1342  state != NULL; state = pcmk__xe_next_same(state)) {
1343 
1344  const char *id = pcmk__xe_id(state);
1345  const char *uname = crm_element_value(state, PCMK_XA_UNAME);
1346  pcmk_node_t *this_node = NULL;
1347 
1348  if ((id == NULL) || (uname == NULL)) {
1349  // Warning already logged in first pass through status section
1350  crm_trace("Not unpacking resource history from malformed "
1351  PCMK__XE_NODE_STATE " without id and/or uname");
1352  continue;
1353  }
1354 
1355  this_node = pe_find_node_any(scheduler->nodes, id, uname);
1356  if (this_node == NULL) {
1357  // Warning already logged in first pass through status section
1358  crm_trace("Not unpacking resource history for node %s because "
1359  "no longer in configuration", id);
1360  continue;
1361  }
1362 
1363  if (this_node->details->unpacked) {
1364  crm_trace("Not unpacking resource history for node %s because "
1365  "already unpacked", id);
1366  continue;
1367  }
1368 
1369  if (fence) {
1370  // We're processing all remaining nodes
1371 
1372  } else if (pcmk__is_guest_or_bundle_node(this_node)) {
1373  /* We can unpack a guest node's history only after we've unpacked
1374  * other resource history to the point that we know that the node's
1375  * connection and containing resource are both up.
1376  */
1377  pcmk_resource_t *rsc = this_node->details->remote_rsc;
1378 
1379  if ((rsc == NULL) || (rsc->role != pcmk_role_started)
1380  || (rsc->container->role != pcmk_role_started)) {
1381  crm_trace("Not unpacking resource history for guest node %s "
1382  "because container and connection are not known to "
1383  "be up", id);
1384  continue;
1385  }
1386 
1387  } else if (pcmk__is_remote_node(this_node)) {
1388  /* We can unpack a remote node's history only after we've unpacked
1389  * other resource history to the point that we know that the node's
1390  * connection is up, with the exception of when shutdown locks are
1391  * in use.
1392  */
1393  pcmk_resource_t *rsc = this_node->details->remote_rsc;
1394 
1395  if ((rsc == NULL)
1397  && (rsc->role != pcmk_role_started))) {
1398  crm_trace("Not unpacking resource history for remote node %s "
1399  "because connection is not known to be up", id);
1400  continue;
1401  }
1402 
1403  /* If fencing and shutdown locks are disabled and we're not processing
1404  * unseen nodes, then we don't want to unpack offline nodes until online
1405  * nodes have been unpacked. This allows us to number active clone
1406  * instances first.
1407  */
1408  } else if (!pcmk_any_flags_set(scheduler->flags,
1411  && !this_node->details->online) {
1412  crm_trace("Not unpacking resource history for offline "
1413  "cluster node %s", id);
1414  continue;
1415  }
1416 
1417  if (pcmk__is_pacemaker_remote_node(this_node)) {
1418  determine_remote_online_status(scheduler, this_node);
1419  unpack_handle_remote_attrs(this_node, state, scheduler);
1420  }
1421 
1422  crm_trace("Unpacking resource history for %snode %s",
1423  (fence? "unseen " : ""), id);
1424 
1425  this_node->details->unpacked = TRUE;
1426  unpack_node_lrm(this_node, state, scheduler);
1427 
1428  rc = EAGAIN; // Other node histories might depend on this one
1429  }
1430  return rc;
1431 }
1432 
1433 /* remove nodes that are down, stopping */
1434 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1435 /* anything else? */
1436 gboolean
1438 {
1439  xmlNode *state = NULL;
1440 
1441  crm_trace("Beginning unpack");
1442 
1443  if (scheduler->tickets == NULL) {
1445  }
1446 
1447  for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
1448  state = pcmk__xe_next(state)) {
1449 
1450  if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
1451  unpack_tickets_state((xmlNode *) state, scheduler);
1452 
1453  } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1454  unpack_node_state(state, scheduler);
1455  }
1456  }
1457 
1458  while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1459  crm_trace("Another pass through node resource histories is needed");
1460  }
1461 
1462  // Now catch any nodes we didn't see
1463  unpack_node_history(status,
1466  scheduler);
1467 
1468  /* Now that we know where resources are, we can schedule stops of containers
1469  * with failed bundle connections
1470  */
1471  if (scheduler->stop_needed != NULL) {
1472  for (GList *item = scheduler->stop_needed; item; item = item->next) {
1473  pcmk_resource_t *container = item->data;
1474  pcmk_node_t *node = pcmk__current_node(container);
1475 
1476  if (node) {
1477  stop_action(container, node, FALSE);
1478  }
1479  }
1480  g_list_free(scheduler->stop_needed);
1481  scheduler->stop_needed = NULL;
1482  }
1483 
1484  /* Now that we know status of all Pacemaker Remote connections and nodes,
1485  * we can stop connections for node shutdowns, and check the online status
1486  * of remote/guest nodes that didn't have any node history to unpack.
1487  */
1488  for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1489  pcmk_node_t *this_node = gIter->data;
1490 
1491  if (!pcmk__is_pacemaker_remote_node(this_node)) {
1492  continue;
1493  }
1494  if (this_node->details->shutdown
1495  && (this_node->details->remote_rsc != NULL)) {
1497  "remote shutdown");
1498  }
1499  if (!this_node->details->unpacked) {
1500  determine_remote_online_status(scheduler, this_node);
1501  }
1502  }
1503 
1504  return TRUE;
1505 }
1506 
1518 static long long
1519 unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
1520 {
1521  const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
1522  int member = 0;
1523 
1524  if (member_time == NULL) {
1525  return -1LL;
1526 
1527  } else if (crm_str_to_boolean(member_time, &member) == 1) {
1528  /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1529  * recorded as a boolean for a DC < 2.1.7, or the node is pending
1530  * shutdown and has left the CPG, in which case it was set to 1 to avoid
1531  * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT.
1532  *
1533  * We return the effective time for in_ccm=1 because what's important to
1534  * avoid fencing is that effective time minus this value is less than
1535  * the pending node timeout.
1536  */
1537  return member? (long long) get_effective_time(scheduler) : 0LL;
1538 
1539  } else {
1540  long long when_member = 0LL;
1541 
1542  if ((pcmk__scan_ll(member_time, &when_member,
1543  0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1544  crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
1545  " in " PCMK__XE_NODE_STATE " entry", member_time);
1546  return -1LL;
1547  }
1548  return when_member;
1549  }
1550 }
1551 
1561 static long long
1562 unpack_node_online(const xmlNode *node_state)
1563 {
1564  const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD);
1565 
1566  // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1567  if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
1569  return 0LL;
1570 
1571  } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1572  return 1LL;
1573 
1574  } else {
1575  long long when_online = 0LL;
1576 
1577  if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1578  || (when_online < 0)) {
1579  crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
1580  PCMK__XE_NODE_STATE " entry, assuming offline", peer_time);
1581  return 0LL;
1582  }
1583  return when_online;
1584  }
1585 }
1586 
1596 static bool
1597 unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
1598 {
1599  long long value = 0LL;
1600  int value_i = 0;
1601  int rc = pcmk_rc_ok;
1602  const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
1603  NULL, pcmk__rsc_node_current);
1604 
1605  // Value may be boolean or an epoch time
1606  if (crm_str_to_boolean(value_s, &value_i) == 1) {
1607  return (value_i != 0);
1608  }
1609  rc = pcmk__scan_ll(value_s, &value, 0LL);
1610  if (rc == pcmk_rc_ok) {
1611  return (value > 0);
1612  }
1613  crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1614  "node attribute for %s: %s",
1615  value_s, pcmk__node_name(node), pcmk_rc_str(rc));
1616  return false;
1617 }
1618 
1619 static gboolean
1620 determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
1621  const xmlNode *node_state,
1622  pcmk_node_t *this_node)
1623 {
1624  gboolean online = FALSE;
1625  const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1626  const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1627  long long when_member = unpack_node_member(node_state, scheduler);
1628  long long when_online = unpack_node_online(node_state);
1629 
1630  if (when_member <= 0) {
1631  crm_trace("Node %s is %sdown", pcmk__node_name(this_node),
1632  ((when_member < 0)? "presumed " : ""));
1633 
1634  } else if (when_online > 0) {
1635  if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1636  online = TRUE;
1637  } else {
1638  crm_debug("Node %s is not ready to run resources: %s",
1639  pcmk__node_name(this_node), join);
1640  }
1641 
1642  } else if (this_node->details->expected_up == FALSE) {
1643  crm_trace("Node %s controller is down: "
1644  "member@%lld online@%lld join=%s expected=%s",
1645  pcmk__node_name(this_node), when_member, when_online,
1646  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1647 
1648  } else {
1649  /* mark it unclean */
1650  pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1651  crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
1652  pcmk__node_name(this_node), when_member, when_online,
1653  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1654  }
1655  return online;
1656 }
1657 
1671 static inline bool
1672 pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1673  long long when_member, long long when_online)
1674 {
1675  if ((scheduler->node_pending_timeout > 0)
1676  && (when_member > 0) && (when_online <= 0)) {
1677  // There is a timeout on pending nodes, and node is pending
1678 
1679  time_t timeout = when_member + scheduler->node_pending_timeout;
1680 
1681  if (get_effective_time(node->details->data_set) >= timeout) {
1682  return true; // Node has timed out
1683  }
1684 
1685  // Node is pending, but still has time
1686  pe__update_recheck_time(timeout, scheduler, "pending node timeout");
1687  }
1688  return false;
1689 }
1690 
1691 static bool
1692 determine_online_status_fencing(pcmk_scheduler_t *scheduler,
1693  const xmlNode *node_state,
1694  pcmk_node_t *this_node)
1695 {
1696  bool termination_requested = unpack_node_terminate(this_node, node_state);
1697  const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1698  const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1699  long long when_member = unpack_node_member(node_state, scheduler);
1700  long long when_online = unpack_node_online(node_state);
1701 
1702 /*
1703  - PCMK__XA_JOIN ::= member|down|pending|banned
1704  - PCMK_XA_EXPECTED ::= member|down
1705 
1706  @COMPAT with entries recorded for DCs < 2.1.7
1707  - PCMK__XA_IN_CCM ::= true|false
1708  - PCMK_XA_CRMD ::= online|offline
1709 
1710  Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1711  - PCMK__XA_IN_CCM ::= <timestamp>|0
1712  Since when node has been a cluster member. A value 0 of means the node is not
1713  a cluster member.
1714 
1715  - PCMK_XA_CRMD ::= <timestamp>|0
1716  Since when peer has been online in CPG. A value 0 means the peer is offline
1717  in CPG.
1718 */
1719 
1720  crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1721  pcmk__node_name(this_node), when_member, when_online,
1722  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1723  (termination_requested? " (termination requested)" : ""));
1724 
1725  if (this_node->details->shutdown) {
1726  crm_debug("%s is shutting down", pcmk__node_name(this_node));
1727 
1728  /* Slightly different criteria since we can't shut down a dead peer */
1729  return (when_online > 0);
1730  }
1731 
1732  if (when_member < 0) {
1733  pe_fence_node(scheduler, this_node,
1734  "peer has not been seen by the cluster", FALSE);
1735  return false;
1736  }
1737 
1738  if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1739  pe_fence_node(scheduler, this_node,
1740  "peer failed Pacemaker membership criteria", FALSE);
1741 
1742  } else if (termination_requested) {
1743  if ((when_member <= 0) && (when_online <= 0)
1744  && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1745  crm_info("%s was fenced as requested", pcmk__node_name(this_node));
1746  return false;
1747  }
1748  pe_fence_node(scheduler, this_node, "fencing was requested", false);
1749 
1750  } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1752 
1753  if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1754  pe_fence_node(scheduler, this_node,
1755  "peer pending timed out on joining the process group",
1756  FALSE);
1757 
1758  } else if ((when_member > 0) || (when_online > 0)) {
1759  crm_info("- %s is not ready to run resources",
1760  pcmk__node_name(this_node));
1761  this_node->details->standby = TRUE;
1762  this_node->details->pending = TRUE;
1763 
1764  } else {
1765  crm_trace("%s is down or still coming up",
1766  pcmk__node_name(this_node));
1767  }
1768 
1769  } else if (when_member <= 0) {
1770  // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
1771  pe_fence_node(scheduler, this_node,
1772  "peer is no longer part of the cluster", TRUE);
1773 
1774  } else if (when_online <= 0) {
1775  pe_fence_node(scheduler, this_node,
1776  "peer process is no longer available", FALSE);
1777 
1778  /* Everything is running at this point, now check join state */
1779 
1780  } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1781  crm_info("%s is active", pcmk__node_name(this_node));
1782 
1783  } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
1784  CRMD_JOINSTATE_DOWN, NULL)) {
1785  crm_info("%s is not ready to run resources",
1786  pcmk__node_name(this_node));
1787  this_node->details->standby = TRUE;
1788  this_node->details->pending = TRUE;
1789 
1790  } else {
1791  pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1792  FALSE);
1793  }
1794 
1795  return (when_member > 0);
1796 }
1797 
1798 static void
1799 determine_remote_online_status(pcmk_scheduler_t *scheduler,
1800  pcmk_node_t *this_node)
1801 {
1802  pcmk_resource_t *rsc = this_node->details->remote_rsc;
1803  pcmk_resource_t *container = NULL;
1804  pcmk_node_t *host = NULL;
1805 
1806  /* If there is a node state entry for a (former) Pacemaker Remote node
1807  * but no resource creating that node, the node's connection resource will
1808  * be NULL. Consider it an offline remote node in that case.
1809  */
1810  if (rsc == NULL) {
1811  this_node->details->online = FALSE;
1812  goto remote_online_done;
1813  }
1814 
1815  container = rsc->container;
1816 
1817  if (container && pcmk__list_of_1(rsc->running_on)) {
1818  host = rsc->running_on->data;
1819  }
1820 
1821  /* If the resource is currently started, mark it online. */
1822  if (rsc->role == pcmk_role_started) {
1823  crm_trace("%s node %s presumed ONLINE because connection resource is started",
1824  (container? "Guest" : "Remote"), this_node->details->id);
1825  this_node->details->online = TRUE;
1826  }
1827 
1828  /* consider this node shutting down if transitioning start->stop */
1829  if ((rsc->role == pcmk_role_started)
1830  && (rsc->next_role == pcmk_role_stopped)) {
1831 
1832  crm_trace("%s node %s shutting down because connection resource is stopping",
1833  (container? "Guest" : "Remote"), this_node->details->id);
1834  this_node->details->shutdown = TRUE;
1835  }
1836 
1837  /* Now check all the failure conditions. */
1838  if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) {
1839  crm_trace("Guest node %s UNCLEAN because guest resource failed",
1840  this_node->details->id);
1841  this_node->details->online = FALSE;
1842  this_node->details->remote_requires_reset = TRUE;
1843 
1844  } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
1845  crm_trace("%s node %s OFFLINE because connection resource failed",
1846  (container? "Guest" : "Remote"), this_node->details->id);
1847  this_node->details->online = FALSE;
1848 
1849  } else if ((rsc->role == pcmk_role_stopped)
1850  || ((container != NULL)
1851  && (container->role == pcmk_role_stopped))) {
1852 
1853  crm_trace("%s node %s OFFLINE because its resource is stopped",
1854  (container? "Guest" : "Remote"), this_node->details->id);
1855  this_node->details->online = FALSE;
1856  this_node->details->remote_requires_reset = FALSE;
1857 
1858  } else if (host && (host->details->online == FALSE)
1859  && host->details->unclean) {
1860  crm_trace("Guest node %s UNCLEAN because host is unclean",
1861  this_node->details->id);
1862  this_node->details->online = FALSE;
1863  this_node->details->remote_requires_reset = TRUE;
1864  }
1865 
1866 remote_online_done:
1867  crm_trace("Remote node %s online=%s",
1868  this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1869 }
1870 
1871 static void
1872 determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
1874 {
1875  gboolean online = FALSE;
1876  const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1877 
1878  CRM_CHECK(this_node != NULL, return);
1879 
1880  this_node->details->shutdown = FALSE;
1881  this_node->details->expected_up = FALSE;
1882 
1883  if (pe__shutdown_requested(this_node)) {
1884  this_node->details->shutdown = TRUE;
1885 
1886  } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1887  this_node->details->expected_up = TRUE;
1888  }
1889 
1890  if (this_node->details->type == node_ping) {
1891  this_node->details->unclean = FALSE;
1892  online = FALSE; /* As far as resource management is concerned,
1893  * the node is safely offline.
1894  * Anyone caught abusing this logic will be shot
1895  */
1896 
1898  online = determine_online_status_no_fencing(scheduler, node_state,
1899  this_node);
1900 
1901  } else {
1902  online = determine_online_status_fencing(scheduler, node_state,
1903  this_node);
1904  }
1905 
1906  if (online) {
1907  this_node->details->online = TRUE;
1908 
1909  } else {
1910  /* remove node from contention */
1911  this_node->fixed = TRUE; // @COMPAT deprecated and unused
1912  this_node->weight = -PCMK_SCORE_INFINITY;
1913  }
1914 
1915  if (online && this_node->details->shutdown) {
1916  /* don't run resources here */
1917  this_node->fixed = TRUE; // @COMPAT deprecated and unused
1918  this_node->weight = -PCMK_SCORE_INFINITY;
1919  }
1920 
1921  if (this_node->details->type == node_ping) {
1922  crm_info("%s is not a Pacemaker node", pcmk__node_name(this_node));
1923 
1924  } else if (this_node->details->unclean) {
1925  pcmk__sched_warn("%s is unclean", pcmk__node_name(this_node));
1926 
1927  } else if (this_node->details->online) {
1928  crm_info("%s is %s", pcmk__node_name(this_node),
1929  this_node->details->shutdown ? "shutting down" :
1930  this_node->details->pending ? "pending" :
1931  this_node->details->standby ? "standby" :
1932  this_node->details->maintenance ? "maintenance" : "online");
1933 
1934  } else {
1935  crm_trace("%s is offline", pcmk__node_name(this_node));
1936  }
1937 }
1938 
1947 const char *
1948 pe_base_name_end(const char *id)
1949 {
1950  if (!pcmk__str_empty(id)) {
1951  const char *end = id + strlen(id) - 1;
1952 
1953  for (const char *s = end; s > id; --s) {
1954  switch (*s) {
1955  case '0':
1956  case '1':
1957  case '2':
1958  case '3':
1959  case '4':
1960  case '5':
1961  case '6':
1962  case '7':
1963  case '8':
1964  case '9':
1965  break;
1966  case ':':
1967  return (s == end)? s : (s - 1);
1968  default:
1969  return end;
1970  }
1971  }
1972  return end;
1973  }
1974  return NULL;
1975 }
1976 
1987 char *
1988 clone_strip(const char *last_rsc_id)
1989 {
1990  const char *end = pe_base_name_end(last_rsc_id);
1991  char *basename = NULL;
1992 
1993  pcmk__assert(end != NULL);
1994  basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1995  pcmk__assert(basename != NULL);
1996  return basename;
1997 }
1998 
2009 char *
2010 clone_zero(const char *last_rsc_id)
2011 {
2012  const char *end = pe_base_name_end(last_rsc_id);
2013  size_t base_name_len = end - last_rsc_id + 1;
2014  char *zero = NULL;
2015 
2016  pcmk__assert(end != NULL);
2017  zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
2018  memcpy(zero, last_rsc_id, base_name_len);
2019  zero[base_name_len] = ':';
2020  zero[base_name_len + 1] = '0';
2021  return zero;
2022 }
2023 
2024 static pcmk_resource_t *
2025 create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
2027 {
2028  pcmk_resource_t *rsc = NULL;
2029  xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
2030 
2031  pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
2032  crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
2033  crm_log_xml_debug(xml_rsc, "Orphan resource");
2034 
2035  if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
2036  return NULL;
2037  }
2038 
2039  if (xml_contains_remote_node(xml_rsc)) {
2040  pcmk_node_t *node;
2041 
2042  crm_debug("Detected orphaned remote node %s", rsc_id);
2043  node = pcmk_find_node(scheduler, rsc_id);
2044  if (node == NULL) {
2045  node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, 0,
2046  scheduler);
2047  }
2048  link_rsc2remotenode(scheduler, rsc);
2049 
2050  if (node) {
2051  crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
2052  node->details->shutdown = TRUE;
2053  }
2054  }
2055 
2056  if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) {
2057  /* This orphaned rsc needs to be mapped to a container. */
2058  crm_trace("Detected orphaned container filler %s", rsc_id);
2060  }
2062  scheduler->resources = g_list_append(scheduler->resources, rsc);
2063  return rsc;
2064 }
2065 
2077 static pcmk_resource_t *
2078 create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
2079  const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2080 {
2082 
2083  // find_rsc() because we might be a cloned group
2084  pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL,
2086 
2087  pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s",
2088  top->id, parent->id, rsc_id, pcmk__node_name(node));
2089  return orphan;
2090 }
2091 
2107 static pcmk_resource_t *
2108 find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2109  pcmk_resource_t *parent, const char *rsc_id)
2110 {
2111  GList *rIter = NULL;
2112  pcmk_resource_t *rsc = NULL;
2113  pcmk_resource_t *inactive_instance = NULL;
2114  gboolean skip_inactive = FALSE;
2115 
2116  pcmk__assert(pcmk__is_anonymous_clone(parent));
2117 
2118  // Check for active (or partially active, for cloned groups) instance
2119  pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
2120  rsc_id, pcmk__node_name(node), parent->id);
2121  for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
2122  GList *locations = NULL;
2123  pcmk_resource_t *child = rIter->data;
2124 
2125  /* Check whether this instance is already known to be active or pending
2126  * anywhere, at this stage of unpacking. Because this function is called
2127  * for a resource before the resource's individual operation history
2128  * entries are unpacked, locations will generally not contain the
2129  * desired node.
2130  *
2131  * However, there are three exceptions:
2132  * (1) when child is a cloned group and we have already unpacked the
2133  * history of another member of the group on the same node;
2134  * (2) when we've already unpacked the history of another numbered
2135  * instance on the same node (which can happen if
2136  * PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
2137  * (3) when we re-run calculations on the same scheduler data as part of
2138  * a simulation.
2139  */
2140  child->fns->location(child, &locations, 2);
2141  if (locations) {
2142  /* We should never associate the same numbered anonymous clone
2143  * instance with multiple nodes, and clone instances can't migrate,
2144  * so there must be only one location, regardless of history.
2145  */
2146  CRM_LOG_ASSERT(locations->next == NULL);
2147 
2148  if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
2149  /* This child instance is active on the requested node, so check
2150  * for a corresponding configured resource. We use find_rsc()
2151  * instead of child because child may be a cloned group, and we
2152  * need the particular member corresponding to rsc_id.
2153  *
2154  * If the history entry is orphaned, rsc will be NULL.
2155  */
2156  rsc = parent->fns->find_rsc(child, rsc_id, NULL,
2158  if (rsc) {
2159  /* If there are multiple instance history entries for an
2160  * anonymous clone in a single node's history (which can
2161  * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
2162  * to false), we want to consider the instances beyond the
2163  * first as orphans, even if there are inactive instance
2164  * numbers available.
2165  */
2166  if (rsc->running_on) {
2167  crm_notice("Active (now-)anonymous clone %s has "
2168  "multiple (orphan) instance histories on %s",
2169  parent->id, pcmk__node_name(node));
2170  skip_inactive = TRUE;
2171  rsc = NULL;
2172  } else {
2173  pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
2174  }
2175  }
2176  }
2177  g_list_free(locations);
2178 
2179  } else {
2180  pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
2181  if (!skip_inactive && !inactive_instance
2182  && !pcmk_is_set(child->flags, pcmk_rsc_blocked)) {
2183  // Remember one inactive instance in case we don't find active
2184  inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
2186 
2187  /* ... but don't use it if it was already associated with a
2188  * pending action on another node
2189  */
2190  if ((inactive_instance != NULL) &&
2191  (inactive_instance->pending_node != NULL) &&
2192  !pcmk__same_node(inactive_instance->pending_node, node)) {
2193  inactive_instance = NULL;
2194  }
2195  }
2196  }
2197  }
2198 
2199  if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2200  pcmk__rsc_trace(parent, "Resource %s, empty slot",
2201  inactive_instance->id);
2202  rsc = inactive_instance;
2203  }
2204 
2205  /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
2206  * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
2207  * don't want to consume a valid instance number for unclean nodes. Such
2208  * instances may appear to be active according to the history, but should be
2209  * considered inactive, so we can start an instance elsewhere. Treat such
2210  * instances as orphans.
2211  *
2212  * An exception is instances running on guest nodes -- since guest node
2213  * "fencing" is actually just a resource stop, requires shouldn't apply.
2214  *
2215  * @TODO Ideally, we'd use an inactive instance number if it is not needed
2216  * for any clean instances. However, we don't know that at this point.
2217  */
2218  if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
2219  && (!node->details->online || node->details->unclean)
2220  && !pcmk__is_guest_or_bundle_node(node)
2222 
2223  rsc = NULL;
2224  }
2225 
2226  if (rsc == NULL) {
2227  rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
2228  pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id);
2229  }
2230  return rsc;
2231 }
2232 
2233 static pcmk_resource_t *
2234 unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2235  const char *rsc_id)
2236 {
2237  pcmk_resource_t *rsc = NULL;
2238  pcmk_resource_t *parent = NULL;
2239 
2240  crm_trace("looking for %s", rsc_id);
2241  rsc = pe_find_resource(scheduler->resources, rsc_id);
2242 
2243  if (rsc == NULL) {
2244  /* If we didn't find the resource by its name in the operation history,
2245  * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
2246  * we create a single :0 orphan to match against here.
2247  */
2248  char *clone0_id = clone_zero(rsc_id);
2250  clone0_id);
2251 
2252  if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) {
2253  rsc = clone0;
2254  parent = uber_parent(clone0);
2255  crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2256  } else {
2257  crm_trace("%s is not known as %s either (orphan)",
2258  rsc_id, clone0_id);
2259  }
2260  free(clone0_id);
2261 
2262  } else if (rsc->variant > pcmk_rsc_variant_primitive) {
2263  crm_trace("Resource history for %s is orphaned because it is no longer primitive",
2264  rsc_id);
2265  return NULL;
2266 
2267  } else {
2268  parent = uber_parent(rsc);
2269  }
2270 
2271  if (pcmk__is_anonymous_clone(parent)) {
2272 
2273  if (pcmk__is_bundled(parent)) {
2274  rsc = pe__find_bundle_replica(parent->parent, node);
2275  } else {
2276  char *base = clone_strip(rsc_id);
2277 
2278  rsc = find_anonymous_clone(scheduler, node, parent, base);
2279  free(base);
2280  pcmk__assert(rsc != NULL);
2281  }
2282  }
2283 
2284  if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
2285  && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_none)) {
2286 
2287  pcmk__str_update(&rsc->clone_name, rsc_id);
2288  pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2289  rsc_id, pcmk__node_name(node), rsc->id,
2290  pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : "");
2291  }
2292  return rsc;
2293 }
2294 
2295 static pcmk_resource_t *
2296 process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
2298 {
2299  pcmk_resource_t *rsc = NULL;
2300  const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2301 
2302  crm_debug("Detected orphan resource %s on %s",
2303  rsc_id, pcmk__node_name(node));
2304  rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2305  if (rsc == NULL) {
2306  return NULL;
2307  }
2308 
2311 
2312  } else {
2313  CRM_CHECK(rsc != NULL, return NULL);
2314  pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id);
2316  "__orphan_do_not_run__", scheduler);
2317  }
2318  return rsc;
2319 }
2320 
2321 static void
2322 process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
2323  enum action_fail_response on_fail)
2324 {
2325  pcmk_node_t *tmpnode = NULL;
2326  char *reason = NULL;
2327  enum action_fail_response save_on_fail = pcmk_on_fail_ignore;
2328 
2329  pcmk__assert(rsc);
2330  pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2331  rsc->id, pcmk_role_text(rsc->role), pcmk__node_name(node),
2332  pcmk_on_fail_text(on_fail));
2333 
2334  /* process current state */
2335  if (rsc->role != pcmk_role_unknown) {
2336  pcmk_resource_t *iter = rsc;
2337 
2338  while (iter) {
2339  if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
2340  pcmk_node_t *n = pe__copy_node(node);
2341 
2342  pcmk__rsc_trace(rsc, "%s%s%s known on %s",
2343  rsc->id,
2344  ((rsc->clone_name == NULL)? "" : " also known as "),
2345  ((rsc->clone_name == NULL)? "" : rsc->clone_name),
2346  pcmk__node_name(n));
2347  g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
2348  }
2349  if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) {
2350  break;
2351  }
2352  iter = iter->parent;
2353  }
2354  }
2355 
2356  /* If a managed resource is believed to be running, but node is down ... */
2357  if ((rsc->role > pcmk_role_stopped)
2358  && node->details->online == FALSE
2359  && node->details->maintenance == FALSE
2360  && pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2361 
2362  gboolean should_fence = FALSE;
2363 
2364  /* If this is a guest node, fence it (regardless of whether fencing is
2365  * enabled, because guest node fencing is done by recovery of the
2366  * container resource rather than by the fencer). Mark the resource
2367  * we're processing as failed. When the guest comes back up, its
2368  * operation history in the CIB will be cleared, freeing the affected
2369  * resource to run again once we are sure we know its state.
2370  */
2371  if (pcmk__is_guest_or_bundle_node(node)) {
2373  should_fence = TRUE;
2374 
2375  } else if (pcmk_is_set(rsc->cluster->flags,
2377  if (pcmk__is_remote_node(node)
2378  && (node->details->remote_rsc != NULL)
2379  && !pcmk_is_set(node->details->remote_rsc->flags,
2380  pcmk_rsc_failed)) {
2381 
2382  /* Setting unseen means that fencing of the remote node will
2383  * occur only if the connection resource is not going to start
2384  * somewhere. This allows connection resources on a failed
2385  * cluster node to move to another node without requiring the
2386  * remote nodes to be fenced as well.
2387  */
2388  node->details->unseen = TRUE;
2389  reason = crm_strdup_printf("%s is active there (fencing will be"
2390  " revoked if remote connection can "
2391  "be re-established elsewhere)",
2392  rsc->id);
2393  }
2394  should_fence = TRUE;
2395  }
2396 
2397  if (should_fence) {
2398  if (reason == NULL) {
2399  reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
2400  }
2401  pe_fence_node(rsc->cluster, node, reason, FALSE);
2402  }
2403  free(reason);
2404  }
2405 
2406  /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2407  save_on_fail = on_fail;
2408 
2409  if (node->details->unclean) {
2410  /* No extra processing needed
2411  * Also allows resources to be started again after a node is shot
2412  */
2413  on_fail = pcmk_on_fail_ignore;
2414  }
2415 
2416  switch (on_fail) {
2417  case pcmk_on_fail_ignore:
2418  /* nothing to do */
2419  break;
2420 
2421  case pcmk_on_fail_demote:
2423  demote_action(rsc, node, FALSE);
2424  break;
2425 
2427  /* treat it as if it is still running
2428  * but also mark the node as unclean
2429  */
2430  reason = crm_strdup_printf("%s failed there", rsc->id);
2431  pe_fence_node(rsc->cluster, node, reason, FALSE);
2432  free(reason);
2433  break;
2434 
2436  node->details->standby = TRUE;
2437  node->details->standby_onfail = TRUE;
2438  break;
2439 
2440  case pcmk_on_fail_block:
2441  /* is_managed == FALSE will prevent any
2442  * actions being sent for the resource
2443  */
2446  break;
2447 
2448  case pcmk_on_fail_ban:
2449  /* make sure it comes up somewhere else
2450  * or not at all
2451  */
2453  "__action_migration_auto__", rsc->cluster);
2454  break;
2455 
2456  case pcmk_on_fail_stop:
2459  break;
2460 
2461  case pcmk_on_fail_restart:
2462  if ((rsc->role != pcmk_role_stopped)
2463  && (rsc->role != pcmk_role_unknown)) {
2464  pcmk__set_rsc_flags(rsc,
2466  stop_action(rsc, node, FALSE);
2467  }
2468  break;
2469 
2472  if ((rsc->container != NULL) && pcmk__is_bundled(rsc)) {
2473  /* A bundle's remote connection can run on a different node than
2474  * the bundle's container. We don't necessarily know where the
2475  * container is running yet, so remember it and add a stop
2476  * action for it later.
2477  */
2478  rsc->cluster->stop_needed =
2479  g_list_prepend(rsc->cluster->stop_needed, rsc->container);
2480  } else if (rsc->container) {
2481  stop_action(rsc->container, node, FALSE);
2482  } else if ((rsc->role != pcmk_role_stopped)
2483  && (rsc->role != pcmk_role_unknown)) {
2484  stop_action(rsc, node, FALSE);
2485  }
2486  break;
2487 
2491  tmpnode = NULL;
2492  if (rsc->is_remote_node) {
2493  tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
2494  }
2495  if (pcmk__is_remote_node(tmpnode)
2496  && !(tmpnode->details->remote_was_fenced)) {
2497  /* The remote connection resource failed in a way that
2498  * should result in fencing the remote node.
2499  */
2500  pe_fence_node(rsc->cluster, tmpnode,
2501  "remote connection is unrecoverable", FALSE);
2502  }
2503  }
2504 
2505  /* require the stop action regardless if fencing is occurring or not. */
2506  if (rsc->role > pcmk_role_stopped) {
2507  stop_action(rsc, node, FALSE);
2508  }
2509 
2510  /* if reconnect delay is in use, prevent the connection from exiting the
2511  * "STOPPED" role until the failure is cleared by the delay timeout. */
2512  if (rsc->remote_reconnect_ms) {
2513  pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2514  }
2515  break;
2516  }
2517 
2518  /* ensure a remote-node connection failure forces an unclean remote-node
2519  * to be fenced. By setting unseen = FALSE, the remote-node failure will
2520  * result in a fencing operation regardless if we're going to attempt to
2521  * reconnect to the remote-node in this transition or not. */
2522  if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) {
2523  tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
2524  if (tmpnode && tmpnode->details->unclean) {
2525  tmpnode->details->unseen = FALSE;
2526  }
2527  }
2528 
2529  if ((rsc->role != pcmk_role_stopped)
2530  && (rsc->role != pcmk_role_unknown)) {
2531  if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
2532  if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2533  crm_notice("Removed resource %s is active on %s and will be "
2534  "stopped when possible",
2535  rsc->id, pcmk__node_name(node));
2536  } else {
2537  crm_notice("Removed resource %s must be stopped manually on %s "
2539  " is set to false", rsc->id, pcmk__node_name(node));
2540  }
2541  }
2542 
2543  native_add_running(rsc, node, rsc->cluster,
2544  (save_on_fail != pcmk_on_fail_ignore));
2545  switch (on_fail) {
2546  case pcmk_on_fail_ignore:
2547  break;
2548  case pcmk_on_fail_demote:
2549  case pcmk_on_fail_block:
2551  break;
2552  default:
2553  pcmk__set_rsc_flags(rsc,
2555  break;
2556  }
2557 
2558  } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2559  /* Only do this for older status sections that included instance numbers
2560  * Otherwise stopped instances will appear as orphans
2561  */
2562  pcmk__rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)",
2563  rsc->clone_name, rsc->id);
2564  free(rsc->clone_name);
2565  rsc->clone_name = NULL;
2566 
2567  } else {
2568  GList *possible_matches = pe__resource_actions(rsc, node,
2569  PCMK_ACTION_STOP, FALSE);
2570  GList *gIter = possible_matches;
2571 
2572  for (; gIter != NULL; gIter = gIter->next) {
2573  pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2574 
2576  }
2577 
2578  g_list_free(possible_matches);
2579  }
2580 
2581  /* A successful stop after migrate_to on the migration source doesn't make
2582  * the partially migrated resource stopped on the migration target.
2583  */
2584  if ((rsc->role == pcmk_role_stopped)
2585  && rsc->partial_migration_source
2586  && rsc->partial_migration_source->details == node->details
2587  && rsc->partial_migration_target
2588  && rsc->running_on) {
2589 
2590  rsc->role = pcmk_role_started;
2591  }
2592 }
2593 
2594 /* create active recurring operations as optional */
2595 static void
2596 process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
2597  int start_index, int stop_index,
2598  GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2599 {
2600  int counter = -1;
2601  const char *task = NULL;
2602  const char *status = NULL;
2603  GList *gIter = sorted_op_list;
2604 
2605  pcmk__assert(rsc != NULL);
2606  pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
2607  rsc->id, start_index, stop_index);
2608 
2609  for (; gIter != NULL; gIter = gIter->next) {
2610  xmlNode *rsc_op = (xmlNode *) gIter->data;
2611 
2612  guint interval_ms = 0;
2613  char *key = NULL;
2614  const char *id = pcmk__xe_id(rsc_op);
2615 
2616  counter++;
2617 
2618  if (node->details->online == FALSE) {
2619  pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
2620  rsc->id, pcmk__node_name(node));
2621  break;
2622 
2623  /* Need to check if there's a monitor for role="Stopped" */
2624  } else if (start_index < stop_index && counter <= stop_index) {
2625  pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2626  id, pcmk__node_name(node));
2627  continue;
2628 
2629  } else if (counter < start_index) {
2630  pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
2631  id, pcmk__node_name(node), counter);
2632  continue;
2633  }
2634 
2635  crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms);
2636  if (interval_ms == 0) {
2637  pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2638  id, pcmk__node_name(node));
2639  continue;
2640  }
2641 
2642  status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2643  if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2644  pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
2645  id, pcmk__node_name(node));
2646  continue;
2647  }
2648  task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2649  /* create the action */
2650  key = pcmk__op_key(rsc->id, task, interval_ms);
2651  pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
2652  custom_action(rsc, key, task, node, TRUE, scheduler);
2653  }
2654 }
2655 
2656 void
2657 calculate_active_ops(const GList *sorted_op_list, int *start_index,
2658  int *stop_index)
2659 {
2660  int counter = -1;
2661  int implied_monitor_start = -1;
2662  int implied_clone_start = -1;
2663  const char *task = NULL;
2664  const char *status = NULL;
2665 
2666  *stop_index = -1;
2667  *start_index = -1;
2668 
2669  for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2670  const xmlNode *rsc_op = (const xmlNode *) iter->data;
2671 
2672  counter++;
2673 
2674  task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2675  status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2676 
2677  if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2678  && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2679  *stop_index = counter;
2680 
2681  } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2682  PCMK_ACTION_MIGRATE_FROM, NULL)) {
2683  *start_index = counter;
2684 
2685  } else if ((implied_monitor_start <= *stop_index)
2686  && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2687  pcmk__str_casei)) {
2688  const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE);
2689 
2690  if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2691  implied_monitor_start = counter;
2692  }
2693  } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
2694  PCMK_ACTION_DEMOTE, NULL)) {
2695  implied_clone_start = counter;
2696  }
2697  }
2698 
2699  if (*start_index == -1) {
2700  if (implied_clone_start != -1) {
2701  *start_index = implied_clone_start;
2702  } else if (implied_monitor_start != -1) {
2703  *start_index = implied_monitor_start;
2704  }
2705  }
2706 }
2707 
2708 // If resource history entry has shutdown lock, remember lock node and time
2709 static void
2710 unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
2711  const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2712 {
2713  time_t lock_time = 0; // When lock started (i.e. node shutdown time)
2714 
2716  &lock_time) == pcmk_ok) && (lock_time != 0)) {
2717 
2718  if ((scheduler->shutdown_lock > 0)
2720  > (lock_time + scheduler->shutdown_lock))) {
2721  pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
2722  rsc->id, pcmk__node_name(node));
2723  pe__clear_resource_history(rsc, node);
2724  } else {
2725  /* @COMPAT I don't like breaking const signatures, but
2726  * rsc->lock_node should really be const -- we just can't change it
2727  * until the next API compatibility break.
2728  */
2729  rsc->lock_node = (pcmk_node_t *) node;
2730  rsc->lock_time = lock_time;
2731  }
2732  }
2733 }
2734 
2745 static pcmk_resource_t *
2746 unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
2748 {
2749  GList *gIter = NULL;
2750  int stop_index = -1;
2751  int start_index = -1;
2752  enum rsc_role_e req_role = pcmk_role_unknown;
2753 
2754  const char *rsc_id = pcmk__xe_id(lrm_resource);
2755 
2756  pcmk_resource_t *rsc = NULL;
2757  GList *op_list = NULL;
2758  GList *sorted_op_list = NULL;
2759 
2760  xmlNode *rsc_op = NULL;
2761  xmlNode *last_failure = NULL;
2762 
2764  enum rsc_role_e saved_role = pcmk_role_unknown;
2765 
2766  if (rsc_id == NULL) {
2767  pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
2768  " entry: No " PCMK_XA_ID);
2769  crm_log_xml_info(lrm_resource, "missing-id");
2770  return NULL;
2771  }
2772  crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s",
2773  rsc_id, pcmk__node_name(node));
2774 
2775  /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
2776  * them
2777  */
2778  for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
2779  NULL);
2780  rsc_op != NULL; rsc_op = pcmk__xe_next_same(rsc_op)) {
2781 
2782  op_list = g_list_prepend(op_list, rsc_op);
2783  }
2784 
2786  if (op_list == NULL) {
2787  // If there are no operations, there is nothing to do
2788  return NULL;
2789  }
2790  }
2791 
2792  /* find the resource */
2793  rsc = unpack_find_resource(scheduler, node, rsc_id);
2794  if (rsc == NULL) {
2795  if (op_list == NULL) {
2796  // If there are no operations, there is nothing to do
2797  return NULL;
2798  } else {
2799  rsc = process_orphan_resource(lrm_resource, node, scheduler);
2800  }
2801  }
2802  pcmk__assert(rsc != NULL);
2803 
2804  // Check whether the resource is "shutdown-locked" to this node
2806  unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2807  }
2808 
2809  /* process operations */
2810  saved_role = rsc->role;
2811  rsc->role = pcmk_role_unknown;
2812  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2813 
2814  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2815  xmlNode *rsc_op = (xmlNode *) gIter->data;
2816 
2817  unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2818  }
2819 
2820  /* create active recurring operations as optional */
2821  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2822  process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2823  scheduler);
2824 
2825  /* no need to free the contents */
2826  g_list_free(sorted_op_list);
2827 
2828  process_rsc_state(rsc, node, on_fail);
2829 
2830  if (get_target_role(rsc, &req_role)) {
2831  if ((rsc->next_role == pcmk_role_unknown)
2832  || (req_role < rsc->next_role)) {
2833 
2834  pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE);
2835 
2836  } else if (req_role > rsc->next_role) {
2837  pcmk__rsc_info(rsc,
2838  "%s: Not overwriting calculated next role %s"
2839  " with requested next role %s",
2840  rsc->id, pcmk_role_text(rsc->next_role),
2841  pcmk_role_text(req_role));
2842  }
2843  }
2844 
2845  if (saved_role > rsc->role) {
2846  rsc->role = saved_role;
2847  }
2848 
2849  return rsc;
2850 }
2851 
2852 static void
2853 handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list,
2855 {
2856  for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list, NULL,
2857  NULL, NULL);
2858  rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
2859 
2860  pcmk_resource_t *rsc;
2861  pcmk_resource_t *container;
2862  const char *rsc_id;
2863  const char *container_id;
2864 
2865  if (!pcmk__xe_is(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2866  continue;
2867  }
2868 
2869  container_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER);
2870  rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2871  if (container_id == NULL || rsc_id == NULL) {
2872  continue;
2873  }
2874 
2875  container = pe_find_resource(scheduler->resources, container_id);
2876  if (container == NULL) {
2877  continue;
2878  }
2879 
2880  rsc = pe_find_resource(scheduler->resources, rsc_id);
2881  if ((rsc == NULL) || (rsc->container != NULL)
2883  continue;
2884  }
2885 
2886  pcmk__rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
2887  rsc->id, container_id);
2888  rsc->container = container;
2889  container->fillers = g_list_append(container->fillers, rsc);
2890  }
2891 }
2892 
2901 static void
2902 unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
2904 {
2905  bool found_orphaned_container_filler = false;
2906 
2907  // Drill down to PCMK__XE_LRM_RESOURCES section
2908  xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
2909  if (xml == NULL) {
2910  return;
2911  }
2912  xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
2913  if (xml == NULL) {
2914  return;
2915  }
2916 
2917  // Unpack each PCMK__XE_LRM_RESOURCE entry
2918  for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
2920  NULL, NULL);
2921  rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) {
2922 
2923  pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2924 
2925  if ((rsc != NULL)
2927  found_orphaned_container_filler = true;
2928  }
2929  }
2930 
2931  /* Now that all resource state has been unpacked for this node, map any
2932  * orphaned container fillers to their container resource.
2933  */
2934  if (found_orphaned_container_filler) {
2935  handle_orphaned_container_fillers(xml, scheduler);
2936  }
2937 }
2938 
2939 static void
2940 set_active(pcmk_resource_t *rsc)
2941 {
2942  const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2943 
2944  if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) {
2945  rsc->role = pcmk_role_unpromoted;
2946  } else {
2947  rsc->role = pcmk_role_started;
2948  }
2949 }
2950 
2951 static void
2952 set_node_score(gpointer key, gpointer value, gpointer user_data)
2953 {
2954  pcmk_node_t *node = value;
2955  int *score = user_data;
2956 
2957  node->weight = *score;
2958 }
2959 
2960 #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
2961  "/" PCMK__XE_NODE_STATE
2962 #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \
2963  "/" PCMK__XE_LRM_RESOURCES \
2964  "/" PCMK__XE_LRM_RESOURCE
2965 #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
2966 
2967 static xmlNode *
2968 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2969  int target_rc, pcmk_scheduler_t *scheduler)
2970 {
2971  GString *xpath = NULL;
2972  xmlNode *xml = NULL;
2973 
2974  CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2975  return NULL);
2976 
2977  xpath = g_string_sized_new(256);
2978  pcmk__g_strcat(xpath,
2979  XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
2980  SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
2981  SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
2982  NULL);
2983 
2984  /* Need to check against transition_magic too? */
2985  if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
2986  pcmk__g_strcat(xpath,
2987  " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
2988  NULL);
2989 
2990  } else if ((source != NULL)
2991  && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
2992  pcmk__g_strcat(xpath,
2993  " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
2994  NULL);
2995  } else {
2996  g_string_append_c(xpath, ']');
2997  }
2998 
2999  xml = get_xpath_object((const char *) xpath->str, scheduler->input,
3000  LOG_DEBUG);
3001  g_string_free(xpath, TRUE);
3002 
3003  if (xml && target_rc >= 0) {
3004  int rc = PCMK_OCF_UNKNOWN_ERROR;
3005  int status = PCMK_EXEC_ERROR;
3006 
3009  if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
3010  return NULL;
3011  }
3012  }
3013  return xml;
3014 }
3015 
3016 static xmlNode *
3017 find_lrm_resource(const char *rsc_id, const char *node_name,
3019 {
3020  GString *xpath = NULL;
3021  xmlNode *xml = NULL;
3022 
3023  CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
3024 
3025  xpath = g_string_sized_new(256);
3026  pcmk__g_strcat(xpath,
3027  XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
3028  SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
3029  NULL);
3030 
3031  xml = get_xpath_object((const char *) xpath->str, scheduler->input,
3032  LOG_DEBUG);
3033 
3034  g_string_free(xpath, TRUE);
3035  return xml;
3036 }
3037 
3047 static bool
3048 unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
3049 {
3050  bool result = false;
3051  xmlXPathObjectPtr search;
3052  char *xpath = NULL;
3053 
3054  xpath = crm_strdup_printf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
3055  SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']"
3057  "[@" PCMK__XA_RC_CODE "!='%d']",
3058  node_name, rsc->id, PCMK_OCF_UNKNOWN);
3059 
3060  search = xpath_search(rsc->cluster->input, xpath);
3061  result = (numXpathResults(search) == 0);
3062  freeXpathObject(search);
3063  free(xpath);
3064  return result;
3065 }
3066 
3079 static bool
3080 monitor_not_running_after(const char *rsc_id, const char *node_name,
3081  const xmlNode *xml_op, bool same_node,
3083 {
3084  /* Any probe/monitor operation on the node indicating it was not running
3085  * there
3086  */
3087  xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
3089 
3090  return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0);
3091 }
3092 
3105 static bool
3106 non_monitor_after(const char *rsc_id, const char *node_name,
3107  const xmlNode *xml_op, bool same_node,
3109 {
3110  xmlNode *lrm_resource = NULL;
3111 
3112  lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
3113  if (lrm_resource == NULL) {
3114  return false;
3115  }
3116 
3117  for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
3118  NULL, NULL);
3119  op != NULL; op = pcmk__xe_next_same(op)) {
3120 
3121  const char * task = NULL;
3122 
3123  if (op == xml_op) {
3124  continue;
3125  }
3126 
3128 
3131  NULL)
3132  && pe__is_newer_op(op, xml_op, same_node) > 0) {
3133  return true;
3134  }
3135  }
3136 
3137  return false;
3138 }
3139 
3152 static bool
3153 newer_state_after_migrate(const char *rsc_id, const char *node_name,
3154  const xmlNode *migrate_to,
3155  const xmlNode *migrate_from,
3157 {
3158  const xmlNode *xml_op = migrate_to;
3159  const char *source = NULL;
3160  const char *target = NULL;
3161  bool same_node = false;
3162 
3163  if (migrate_from) {
3164  xml_op = migrate_from;
3165  }
3166 
3167  source = crm_element_value(xml_op, PCMK__META_MIGRATE_SOURCE);
3169 
3170  /* It's preferred to compare to the migrate event on the same node if
3171  * existing, since call ids are more reliable.
3172  */
3173  if (pcmk__str_eq(node_name, target, pcmk__str_casei)) {
3174  if (migrate_from) {
3175  xml_op = migrate_from;
3176  same_node = true;
3177 
3178  } else {
3179  xml_op = migrate_to;
3180  }
3181 
3182  } else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3183  if (migrate_to) {
3184  xml_op = migrate_to;
3185  same_node = true;
3186 
3187  } else {
3188  xml_op = migrate_from;
3189  }
3190  }
3191 
3192  /* If there's any newer non-monitor operation on the node, or any newer
3193  * probe/monitor operation on the node indicating it was not running there,
3194  * the migration events potentially no longer matter for the node.
3195  */
3196  return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler)
3197  || monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
3198  scheduler);
3199 }
3200 
3213 static int
3214 get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
3215  const pcmk_node_t *target_node,
3216  const char **source_name, const char **target_name)
3217 {
3218  *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE);
3219  *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET);
3220  if ((*source_name == NULL) || (*target_name == NULL)) {
3221  pcmk__config_err("Ignoring resource history entry %s without "
3223  PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
3224  return pcmk_rc_unpack_error;
3225  }
3226 
3227  if ((source_node != NULL)
3228  && !pcmk__str_eq(*source_name, source_node->details->uname,
3230  pcmk__config_err("Ignoring resource history entry %s because "
3231  PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
3232  pcmk__xe_id(entry), *source_name,
3233  pcmk__node_name(source_node));
3234  return pcmk_rc_unpack_error;
3235  }
3236 
3237  if ((target_node != NULL)
3238  && !pcmk__str_eq(*target_name, target_node->details->uname,
3240  pcmk__config_err("Ignoring resource history entry %s because "
3241  PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
3242  pcmk__xe_id(entry), *target_name,
3243  pcmk__node_name(target_node));
3244  return pcmk_rc_unpack_error;
3245  }
3246 
3247  return pcmk_rc_ok;
3248 }
3249 
3250 /*
3251  * \internal
3252  * \brief Add a migration source to a resource's list of dangling migrations
3253  *
3254  * If the migrate_to and migrate_from actions in a live migration both
3255  * succeeded, but there is no stop on the source, the migration is considered
3256  * "dangling." Add the source to the resource's dangling migration list, which
3257  * will be used to schedule a stop on the source without affecting the target.
3258  *
3259  * \param[in,out] rsc Resource involved in migration
3260  * \param[in] node Migration source
3261  */
3262 static void
3263 add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
3264 {
3265  pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3266  rsc->id, pcmk__node_name(node));
3267  rsc->role = pcmk_role_stopped;
3268  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations,
3269  (gpointer) node);
3270 }
3271 
3278 static void
3279 unpack_migrate_to_success(struct action_history *history)
3280 {
3281  /* A complete migration sequence is:
3282  * 1. migrate_to on source node (which succeeded if we get to this function)
3283  * 2. migrate_from on target node
3284  * 3. stop on source node
3285  *
3286  * If no migrate_from has happened, the migration is considered to be
3287  * "partial". If the migrate_from succeeded but no stop has happened, the
3288  * migration is considered to be "dangling".
3289  *
3290  * If a successful migrate_to and stop have happened on the source node, we
3291  * still need to check for a partial migration, due to scenarios (easier to
3292  * produce with batch-limit=1) like:
3293  *
3294  * - A resource is migrating from node1 to node2, and a migrate_to is
3295  * initiated for it on node1.
3296  *
3297  * - node2 goes into standby mode while the migrate_to is pending, which
3298  * aborts the transition.
3299  *
3300  * - Upon completion of the migrate_to, a new transition schedules a stop
3301  * on both nodes and a start on node1.
3302  *
3303  * - If the new transition is aborted for any reason while the resource is
3304  * stopping on node1, the transition after that stop completes will see
3305  * the migrate_to and stop on the source, but it's still a partial
3306  * migration, and the resource must be stopped on node2 because it is
3307  * potentially active there due to the migrate_to.
3308  *
3309  * We also need to take into account that either node's history may be
3310  * cleared at any point in the migration process.
3311  */
3312  int from_rc = PCMK_OCF_OK;
3313  int from_status = PCMK_EXEC_PENDING;
3314  pcmk_node_t *target_node = NULL;
3315  xmlNode *migrate_from = NULL;
3316  const char *source = NULL;
3317  const char *target = NULL;
3318  bool source_newer_op = false;
3319  bool target_newer_state = false;
3320  bool active_on_target = false;
3321 
3322  // Get source and target node names from XML
3323  if (get_migration_node_names(history->xml, history->node, NULL, &source,
3324  &target) != pcmk_rc_ok) {
3325  return;
3326  }
3327 
3328  // Check for newer state on the source
3329  source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3330  true, history->rsc->cluster);
3331 
3332  // Check for a migrate_from action from this source on the target
3333  migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3334  target, source, -1, history->rsc->cluster);
3335  if (migrate_from != NULL) {
3336  if (source_newer_op) {
3337  /* There's a newer non-monitor operation on the source and a
3338  * migrate_from on the target, so this migrate_to is irrelevant to
3339  * the resource's state.
3340  */
3341  return;
3342  }
3343  crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
3344  crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
3345  }
3346 
3347  /* If the resource has newer state on both the source and target after the
3348  * migration events, this migrate_to is irrelevant to the resource's state.
3349  */
3350  target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3351  history->xml, migrate_from,
3352  history->rsc->cluster);
3353  if (source_newer_op && target_newer_state) {
3354  return;
3355  }
3356 
3357  /* Check for dangling migration (migrate_from succeeded but stop not done).
3358  * We know there's no stop because we already returned if the target has a
3359  * migrate_from and the source has any newer non-monitor operation.
3360  */
3361  if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3362  add_dangling_migration(history->rsc, history->node);
3363  return;
3364  }
3365 
3366  /* Without newer state, this migrate_to implies the resource is active.
3367  * (Clones are not allowed to migrate, so role can't be promoted.)
3368  */
3369  history->rsc->role = pcmk_role_started;
3370 
3371  target_node = pcmk_find_node(history->rsc->cluster, target);
3372  active_on_target = !target_newer_state && (target_node != NULL)
3373  && target_node->details->online;
3374 
3375  if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3376  if (active_on_target) {
3377  native_add_running(history->rsc, target_node, history->rsc->cluster,
3378  TRUE);
3379  } else {
3380  // Mark resource as failed, require recovery, and prevent migration
3381  pcmk__set_rsc_flags(history->rsc,
3384  }
3385  return;
3386  }
3387 
3388  // The migrate_from is pending, complete but erased, or to be scheduled
3389 
3390  /* If there is no history at all for the resource on an online target, then
3391  * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3392  * have the probe result, it will be reflected in target_newer_state.
3393  */
3394  if ((target_node != NULL) && target_node->details->online
3395  && unknown_on_node(history->rsc, target)) {
3396  return;
3397  }
3398 
3399  if (active_on_target) {
3400  pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
3401  source);
3402 
3403  native_add_running(history->rsc, target_node, history->rsc->cluster,
3404  FALSE);
3405  if ((source_node != NULL) && source_node->details->online) {
3406  /* This is a partial migration: the migrate_to completed
3407  * successfully on the source, but the migrate_from has not
3408  * completed. Remember the source and target; if the newly
3409  * chosen target remains the same when we schedule actions
3410  * later, we may continue with the migration.
3411  */
3412  history->rsc->partial_migration_target = target_node;
3413  history->rsc->partial_migration_source = source_node;
3414  }
3415 
3416  } else if (!source_newer_op) {
3417  // Mark resource as failed, require recovery, and prevent migration
3418  pcmk__set_rsc_flags(history->rsc,
3421  }
3422 }
3423 
3430 static void
3431 unpack_migrate_to_failure(struct action_history *history)
3432 {
3433  xmlNode *target_migrate_from = NULL;
3434  const char *source = NULL;
3435  const char *target = NULL;
3436 
3437  // Get source and target node names from XML
3438  if (get_migration_node_names(history->xml, history->node, NULL, &source,
3439  &target) != pcmk_rc_ok) {
3440  return;
3441  }
3442 
3443  /* If a migration failed, we have to assume the resource is active. Clones
3444  * are not allowed to migrate, so role can't be promoted.
3445  */
3446  history->rsc->role = pcmk_role_started;
3447 
3448  // Check for migrate_from on the target
3449  target_migrate_from = find_lrm_op(history->rsc->id,
3451  PCMK_OCF_OK, history->rsc->cluster);
3452 
3453  if (/* If the resource state is unknown on the target, it will likely be
3454  * probed there.
3455  * Don't just consider it running there. We will get back here anyway in
3456  * case the probe detects it's running there.
3457  */
3458  !unknown_on_node(history->rsc, target)
3459  /* If the resource has newer state on the target after the migration
3460  * events, this migrate_to no longer matters for the target.
3461  */
3462  && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3463  target_migrate_from,
3464  history->rsc->cluster)) {
3465  /* The resource has no newer state on the target, so assume it's still
3466  * active there.
3467  * (if it is up).
3468  */
3469  pcmk_node_t *target_node = pcmk_find_node(history->rsc->cluster,
3470  target);
3471 
3472  if (target_node && target_node->details->online) {
3473  native_add_running(history->rsc, target_node, history->rsc->cluster,
3474  FALSE);
3475  }
3476 
3477  } else if (!non_monitor_after(history->rsc->id, source, history->xml, true,
3478  history->rsc->cluster)) {
3479  /* We know the resource has newer state on the target, but this
3480  * migrate_to still matters for the source as long as there's no newer
3481  * non-monitor operation there.
3482  */
3483 
3484  // Mark node as having dangling migration so we can force a stop later
3485  history->rsc->dangling_migrations =
3486  g_list_prepend(history->rsc->dangling_migrations,
3487  (gpointer) history->node);
3488  }
3489 }
3490 
3497 static void
3498 unpack_migrate_from_failure(struct action_history *history)
3499 {
3500  xmlNode *source_migrate_to = NULL;
3501  const char *source = NULL;
3502  const char *target = NULL;
3503 
3504  // Get source and target node names from XML
3505  if (get_migration_node_names(history->xml, NULL, history->node, &source,
3506  &target) != pcmk_rc_ok) {
3507  return;
3508  }
3509 
3510  /* If a migration failed, we have to assume the resource is active. Clones
3511  * are not allowed to migrate, so role can't be promoted.
3512  */
3513  history->rsc->role = pcmk_role_started;
3514 
3515  // Check for a migrate_to on the source
3516  source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3517  source, target, PCMK_OCF_OK,
3518  history->rsc->cluster);
3519 
3520  if (/* If the resource state is unknown on the source, it will likely be
3521  * probed there.
3522  * Don't just consider it running there. We will get back here anyway in
3523  * case the probe detects it's running there.
3524  */
3525  !unknown_on_node(history->rsc, source)
3526  /* If the resource has newer state on the source after the migration
3527  * events, this migrate_from no longer matters for the source.
3528  */
3529  && !newer_state_after_migrate(history->rsc->id, source,
3530  source_migrate_to, history->xml,
3531  history->rsc->cluster)) {
3532  /* The resource has no newer state on the source, so assume it's still
3533  * active there (if it is up).
3534  */
3535  pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
3536  source);
3537 
3538  if (source_node && source_node->details->online) {
3539  native_add_running(history->rsc, source_node, history->rsc->cluster,
3540  TRUE);
3541  }
3542  }
3543 }
3544 
3551 static void
3552 record_failed_op(struct action_history *history)
3553 {
3554  if (!(history->node->details->online)) {
3555  return;
3556  }
3557 
3558  for (const xmlNode *xIter = history->rsc->cluster->failed->children;
3559  xIter != NULL; xIter = xIter->next) {
3560 
3561  const char *key = pcmk__xe_history_key(xIter);
3562  const char *uname = crm_element_value(xIter, PCMK_XA_UNAME);
3563 
3564  if (pcmk__str_eq(history->key, key, pcmk__str_none)
3565  && pcmk__str_eq(uname, history->node->details->uname,
3566  pcmk__str_casei)) {
3567  crm_trace("Skipping duplicate entry %s on %s",
3568  history->key, pcmk__node_name(history->node));
3569  return;
3570  }
3571  }
3572 
3573  crm_trace("Adding entry for %s on %s to failed action list",
3574  history->key, pcmk__node_name(history->node));
3575  crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
3576  crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
3577  pcmk__xml_copy(history->rsc->cluster->failed, history->xml);
3578 }
3579 
3580 static char *
3581 last_change_str(const xmlNode *xml_op)
3582 {
3583  time_t when;
3584  char *result = NULL;
3585 
3587  &when) == pcmk_ok) {
3588  char *when_s = pcmk__epoch2str(&when, 0);
3589  const char *p = strchr(when_s, ' ');
3590 
3591  // Skip day of week to make message shorter
3592  if ((p != NULL) && (*(++p) != '\0')) {
3593  result = pcmk__str_copy(p);
3594  }
3595  free(when_s);
3596  }
3597 
3598  if (result == NULL) {
3599  result = pcmk__str_copy("unknown_time");
3600  }
3601 
3602  return result;
3603 }
3604 
3617 static int
3618 cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
3619 {
3620  switch (first) {
3621  case pcmk_on_fail_demote:
3622  switch (second) {
3623  case pcmk_on_fail_ignore:
3624  return 1;
3625  case pcmk_on_fail_demote:
3626  return 0;
3627  default:
3628  return -1;
3629  }
3630  break;
3631 
3633  switch (second) {
3634  case pcmk_on_fail_ignore:
3635  case pcmk_on_fail_demote:
3636  case pcmk_on_fail_restart:
3637  return 1;
3639  return 0;
3640  default:
3641  return -1;
3642  }
3643  break;
3644 
3646  switch (second) {
3647  case pcmk_on_fail_ignore:
3648  case pcmk_on_fail_demote:
3649  case pcmk_on_fail_restart:
3651  return 1;
3653  return 0;
3654  default:
3655  return -1;
3656  }
3657  break;
3658 
3659  default:
3660  break;
3661  }
3662  switch (second) {
3663  case pcmk_on_fail_demote:
3664  return (first == pcmk_on_fail_ignore)? -1 : 1;
3665 
3667  switch (first) {
3668  case pcmk_on_fail_ignore:
3669  case pcmk_on_fail_demote:
3670  case pcmk_on_fail_restart:
3671  return -1;
3672  default:
3673  return 1;
3674  }
3675  break;
3676 
3678  switch (first) {
3679  case pcmk_on_fail_ignore:
3680  case pcmk_on_fail_demote:
3681  case pcmk_on_fail_restart:
3683  return -1;
3684  default:
3685  return 1;
3686  }
3687  break;
3688 
3689  default:
3690  break;
3691  }
3692  return first - second;
3693 }
3694 
3701 static void
3702 ban_from_all_nodes(pcmk_resource_t *rsc)
3703 {
3704  int score = -PCMK_SCORE_INFINITY;
3705  pcmk_resource_t *fail_rsc = rsc;
3706 
3707  if (fail_rsc->parent != NULL) {
3708  pcmk_resource_t *parent = uber_parent(fail_rsc);
3709 
3710  if (pcmk__is_anonymous_clone(parent)) {
3711  /* For anonymous clones, if an operation with
3712  * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
3713  * entire clone must stop.
3714  */
3715  fail_rsc = parent;
3716  }
3717  }
3718 
3719  // Ban the resource from all nodes
3720  crm_notice("%s will not be started under current conditions", fail_rsc->id);
3721  if (fail_rsc->allowed_nodes != NULL) {
3722  g_hash_table_destroy(fail_rsc->allowed_nodes);
3723  }
3724  fail_rsc->allowed_nodes = pe__node_list2table(rsc->cluster->nodes);
3725  g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
3726 }
3727 
3736 static void
3737 unpack_failure_handling(struct action_history *history,
3738  enum action_fail_response *on_fail,
3739  enum rsc_role_e *fail_role)
3740 {
3741  xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3742  history->interval_ms, true);
3743 
3744  GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3745  history->task,
3746  history->interval_ms, config);
3747 
3748  const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
3749 
3750  *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3751  history->interval_ms, on_fail_str);
3752  *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3753  meta);
3754  g_hash_table_destroy(meta);
3755 }
3756 
3767 static void
3768 unpack_rsc_op_failure(struct action_history *history,
3769  enum action_fail_response config_on_fail,
3770  enum rsc_role_e fail_role, xmlNode **last_failure,
3771  enum action_fail_response *on_fail)
3772 {
3773  bool is_probe = false;
3774  char *last_change_s = NULL;
3775 
3776  *last_failure = history->xml;
3777 
3778  is_probe = pcmk_xe_is_probe(history->xml);
3779  last_change_s = last_change_str(history->xml);
3780 
3781  if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster)
3782  && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3783  crm_trace("Unexpected result (%s%s%s) was recorded for "
3784  "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3785  services_ocf_exitcode_str(history->exit_status),
3786  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3787  pcmk__s(history->exit_reason, ""),
3788  (is_probe? "probe" : history->task), history->rsc->id,
3789  pcmk__node_name(history->node), last_change_s,
3790  history->exit_status, history->id);
3791  } else {
3792  pcmk__sched_warn("Unexpected result (%s%s%s) was recorded for %s of "
3793  "%s on %s at %s " CRM_XS " exit-status=%d id=%s",
3794  services_ocf_exitcode_str(history->exit_status),
3795  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3796  pcmk__s(history->exit_reason, ""),
3797  (is_probe? "probe" : history->task), history->rsc->id,
3798  pcmk__node_name(history->node), last_change_s,
3799  history->exit_status, history->id);
3800 
3801  if (is_probe && (history->exit_status != PCMK_OCF_OK)
3802  && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3803  && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3804 
3805  /* A failed (not just unexpected) probe result could mean the user
3806  * didn't know resources will be probed even where they can't run.
3807  */
3808  crm_notice("If it is not possible for %s to run on %s, see "
3809  "the " PCMK_XA_RESOURCE_DISCOVERY " option for location "
3810  "constraints",
3811  history->rsc->id, pcmk__node_name(history->node));
3812  }
3813 
3814  record_failed_op(history);
3815  }
3816 
3817  free(last_change_s);
3818 
3819  if (cmp_on_fail(*on_fail, config_on_fail) < 0) {
3820  pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3821  pcmk_on_fail_text(*on_fail),
3822  pcmk_on_fail_text(config_on_fail), history->key);
3823  *on_fail = config_on_fail;
3824  }
3825 
3826  if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3827  resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
3828  "__stop_fail__", history->rsc->cluster);
3829 
3830  } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3831  unpack_migrate_to_failure(history);
3832 
3833  } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3834  unpack_migrate_from_failure(history);
3835 
3836  } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3837  history->rsc->role = pcmk_role_promoted;
3838 
3839  } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3840  if (config_on_fail == pcmk_on_fail_block) {
3841  history->rsc->role = pcmk_role_promoted;
3842  pe__set_next_role(history->rsc, pcmk_role_stopped,
3843  "demote with " PCMK_META_ON_FAIL "=block");
3844 
3845  } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3846  history->rsc->role = pcmk_role_stopped;
3847 
3848  } else {
3849  /* Staying in the promoted role would put the scheduler and
3850  * controller into a loop. Setting the role to unpromoted is not
3851  * dangerous because the resource will be stopped as part of
3852  * recovery, and any promotion will be ordered after that stop.
3853  */
3854  history->rsc->role = pcmk_role_unpromoted;
3855  }
3856  }
3857 
3858  if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3859  /* leave stopped */
3860  pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3861  history->rsc->role = pcmk_role_stopped;
3862 
3863  } else if (history->rsc->role < pcmk_role_started) {
3864  pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3865  set_active(history->rsc);
3866  }
3867 
3868  pcmk__rsc_trace(history->rsc,
3869  "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3870  history->rsc->id, pcmk_role_text(history->rsc->role),
3871  pcmk__btoa(history->node->details->unclean),
3872  pcmk_on_fail_text(config_on_fail),
3873  pcmk_role_text(fail_role));
3874 
3875  if ((fail_role != pcmk_role_started)
3876  && (history->rsc->next_role < fail_role)) {
3877  pe__set_next_role(history->rsc, fail_role, "failure");
3878  }
3879 
3880  if (fail_role == pcmk_role_stopped) {
3881  ban_from_all_nodes(history->rsc);
3882  }
3883 }
3884 
3894 static void
3895 block_if_unrecoverable(struct action_history *history)
3896 {
3897  char *last_change_s = NULL;
3898 
3899  if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3900  return; // All actions besides stop are always recoverable
3901  }
3902  if (pe_can_fence(history->node->details->data_set, history->node)) {
3903  return; // Failed stops are recoverable via fencing
3904  }
3905 
3906  last_change_s = last_change_str(history->xml);
3907  pcmk__sched_err("No further recovery can be attempted for %s "
3908  "because %s on %s failed (%s%s%s) at %s "
3909  CRM_XS " rc=%d id=%s",
3910  history->rsc->id, history->task,
3911  pcmk__node_name(history->node),
3912  services_ocf_exitcode_str(history->exit_status),
3913  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3914  pcmk__s(history->exit_reason, ""),
3915  last_change_s, history->exit_status, history->id);
3916 
3917  free(last_change_s);
3918 
3919  pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_managed);
3920  pcmk__set_rsc_flags(history->rsc, pcmk_rsc_blocked);
3921 }
3922 
3932 static inline void
3933 remap_because(struct action_history *history, const char **why, int value,
3934  const char *reason)
3935 {
3936  if (history->execution_status != value) {
3937  history->execution_status = value;
3938  *why = reason;
3939  }
3940 }
3941 
3964 static void
3965 remap_operation(struct action_history *history,
3966  enum action_fail_response *on_fail, bool expired)
3967 {
3968  bool is_probe = false;
3969  int orig_exit_status = history->exit_status;
3970  int orig_exec_status = history->execution_status;
3971  const char *why = NULL;
3972  const char *task = history->task;
3973 
3974  // Remap degraded results to their successful counterparts
3975  history->exit_status = pcmk__effective_rc(history->exit_status);
3976  if (history->exit_status != orig_exit_status) {
3977  why = "degraded result";
3978  if (!expired && (!history->node->details->shutdown
3979  || history->node->details->online)) {
3980  record_failed_op(history);
3981  }
3982  }
3983 
3984  if (!pcmk__is_bundled(history->rsc)
3985  && pcmk_xe_mask_probe_failure(history->xml)
3986  && ((history->execution_status != PCMK_EXEC_DONE)
3987  || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3988  history->execution_status = PCMK_EXEC_DONE;
3989  history->exit_status = PCMK_OCF_NOT_RUNNING;
3990  why = "equivalent probe result";
3991  }
3992 
3993  /* If the executor reported an execution status of anything but done or
3994  * error, consider that final. But for done or error, we know better whether
3995  * it should be treated as a failure or not, because we know the expected
3996  * result.
3997  */
3998  switch (history->execution_status) {
3999  case PCMK_EXEC_DONE:
4000  case PCMK_EXEC_ERROR:
4001  break;
4002 
4003  // These should be treated as node-fatal
4005  case PCMK_EXEC_NO_SECRETS:
4006  remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
4007  "node-fatal error");
4008  goto remap_done;
4009 
4010  default:
4011  goto remap_done;
4012  }
4013 
4014  is_probe = pcmk_xe_is_probe(history->xml);
4015  if (is_probe) {
4016  task = "probe";
4017  }
4018 
4019  if (history->expected_exit_status < 0) {
4020  /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
4021  * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
4022  * expected exit status in the transition key, which (along with the
4023  * similar case of a corrupted transition key in the CIB) will be
4024  * reported to this function as -1. Pacemaker 2.0+ does not support
4025  * rolling upgrades from those versions or processing of saved CIB files
4026  * from those versions, so we do not need to care much about this case.
4027  */
4028  remap_because(history, &why, PCMK_EXEC_ERROR,
4029  "obsolete history format");
4030  pcmk__config_warn("Expected result not found for %s on %s "
4031  "(corrupt or obsolete CIB?)",
4032  history->key, pcmk__node_name(history->node));
4033 
4034  } else if (history->exit_status == history->expected_exit_status) {
4035  remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
4036 
4037  } else {
4038  remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
4039  pcmk__rsc_debug(history->rsc,
4040  "%s on %s: expected %d (%s), got %d (%s%s%s)",
4041  history->key, pcmk__node_name(history->node),
4042  history->expected_exit_status,
4043  services_ocf_exitcode_str(history->expected_exit_status),
4044  history->exit_status,
4045  services_ocf_exitcode_str(history->exit_status),
4046  (pcmk__str_empty(history->exit_reason)? "" : ": "),
4047  pcmk__s(history->exit_reason, ""));
4048  }
4049 
4050  switch (history->exit_status) {
4051  case PCMK_OCF_OK:
4052  if (is_probe
4053  && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
4054  char *last_change_s = last_change_str(history->xml);
4055 
4056  remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4057  pcmk__rsc_info(history->rsc,
4058  "Probe found %s active on %s at %s",
4059  history->rsc->id, pcmk__node_name(history->node),
4060  last_change_s);
4061  free(last_change_s);
4062  }
4063  break;
4064 
4065  case PCMK_OCF_NOT_RUNNING:
4066  if (is_probe
4067  || (history->expected_exit_status == history->exit_status)
4068  || !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) {
4069 
4070  /* For probes, recurring monitors for the Stopped role, and
4071  * unmanaged resources, "not running" is not considered a
4072  * failure.
4073  */
4074  remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
4075  history->rsc->role = pcmk_role_stopped;
4076  *on_fail = pcmk_on_fail_ignore;
4077  pe__set_next_role(history->rsc, pcmk_role_unknown,
4078  "not running");
4079  }
4080  break;
4081 
4083  if (is_probe
4084  && (history->exit_status != history->expected_exit_status)) {
4085  char *last_change_s = last_change_str(history->xml);
4086 
4087  remap_because(history, &why, PCMK_EXEC_DONE, "probe");
4088  pcmk__rsc_info(history->rsc,
4089  "Probe found %s active and promoted on %s at %s",
4090  history->rsc->id,
4091  pcmk__node_name(history->node), last_change_s);
4092  free(last_change_s);
4093  }
4094  if (!expired
4095  || (history->exit_status == history->expected_exit_status)) {
4096  history->rsc->role = pcmk_role_promoted;
4097  }
4098  break;
4099 
4101  if (!expired) {
4102  history->rsc->role = pcmk_role_promoted;
4103  }
4104  remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
4105  break;
4106 
4108  remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
4109  break;
4110 
4112  {
4113  guint interval_ms = 0;
4115  &interval_ms);
4116 
4117  if (interval_ms == 0) {
4118  if (!expired) {
4119  block_if_unrecoverable(history);
4120  }
4121  remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
4122  "exit status");
4123  } else {
4124  remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
4125  "exit status");
4126  }
4127  }
4128  break;
4129 
4133  if (!expired) {
4134  block_if_unrecoverable(history);
4135  }
4136  remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
4137  break;
4138 
4139  default:
4140  if (history->execution_status == PCMK_EXEC_DONE) {
4141  char *last_change_s = last_change_str(history->xml);
4142 
4143  crm_info("Treating unknown exit status %d from %s of %s "
4144  "on %s at %s as failure",
4145  history->exit_status, task, history->rsc->id,
4146  pcmk__node_name(history->node), last_change_s);
4147  remap_because(history, &why, PCMK_EXEC_ERROR,
4148  "unknown exit status");
4149  free(last_change_s);
4150  }
4151  break;
4152  }
4153 
4154 remap_done:
4155  if (why != NULL) {
4156  pcmk__rsc_trace(history->rsc,
4157  "Remapped %s result from [%s: %s] to [%s: %s] "
4158  "because of %s",
4159  history->key, pcmk_exec_status_str(orig_exec_status),
4160  crm_exit_str(orig_exit_status),
4161  pcmk_exec_status_str(history->execution_status),
4162  crm_exit_str(history->exit_status), why);
4163  }
4164 }
4165 
4166 // return TRUE if start or monitor last failure but parameters changed
4167 static bool
4168 should_clear_for_param_change(const xmlNode *xml_op, const char *task,
4169  pcmk_resource_t *rsc, pcmk_node_t *node)
4170 {
4172  if (pe__bundle_needs_remote_name(rsc)) {
4173  /* We haven't allocated resources yet, so we can't reliably
4174  * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4175  * When that's needed, defer the check until later.
4176  */
4177  pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure,
4178  rsc->cluster);
4179 
4180  } else {
4181  pcmk__op_digest_t *digest_data = NULL;
4182 
4183  digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4184  rsc->cluster);
4185  switch (digest_data->rc) {
4186  case pcmk__digest_unknown:
4187  crm_trace("Resource %s history entry %s on %s"
4188  " has no digest to compare",
4189  rsc->id, pcmk__xe_history_key(xml_op),
4190  node->details->id);
4191  break;
4192  case pcmk__digest_match:
4193  break;
4194  default:
4195  return TRUE;
4196  }
4197  }
4198  }
4199  return FALSE;
4200 }
4201 
4202 // Order action after fencing of remote node, given connection rsc
4203 static void
4204 order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
4206 {
4207  pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
4208 
4209  if (remote_node) {
4210  pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4211  FALSE, scheduler);
4212 
4214  }
4215 }
4216 
4217 static bool
4218 should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
4219  guint interval_ms, bool is_last_failure)
4220 {
4221  /* Clearing failures of recurring monitors has special concerns. The
4222  * executor reports only changes in the monitor result, so if the
4223  * monitor is still active and still getting the same failure result,
4224  * that will go undetected after the failure is cleared.
4225  *
4226  * Also, the operation history will have the time when the recurring
4227  * monitor result changed to the given code, not the time when the
4228  * result last happened.
4229  *
4230  * @TODO We probably should clear such failures only when the failure
4231  * timeout has passed since the last occurrence of the failed result.
4232  * However we don't record that information. We could maybe approximate
4233  * that by clearing only if there is a more recent successful monitor or
4234  * stop result, but we don't even have that information at this point
4235  * since we are still unpacking the resource's operation history.
4236  *
4237  * This is especially important for remote connection resources with a
4238  * reconnect interval, so in that case, we skip clearing failures
4239  * if the remote node hasn't been fenced.
4240  */
4241  if (rsc->remote_reconnect_ms
4243  && (interval_ms != 0)
4244  && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4245 
4246  pcmk_node_t *remote_node = pcmk_find_node(rsc->cluster, rsc->id);
4247 
4248  if (remote_node && !remote_node->details->remote_was_fenced) {
4249  if (is_last_failure) {
4250  crm_info("Waiting to clear monitor failure for remote node %s"
4251  " until fencing has occurred", rsc->id);
4252  }
4253  return TRUE;
4254  }
4255  }
4256  return FALSE;
4257 }
4258 
4277 static bool
4278 check_operation_expiry(struct action_history *history)
4279 {
4280  bool expired = false;
4281  bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
4282  time_t last_run = 0;
4283  int unexpired_fail_count = 0;
4284  const char *clear_reason = NULL;
4285 
4286  if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4287  pcmk__rsc_trace(history->rsc,
4288  "Resource history entry %s on %s is not expired: "
4289  "Not Installed does not expire",
4290  history->id, pcmk__node_name(history->node));
4291  return false; // "Not installed" must always be cleared manually
4292  }
4293 
4294  if ((history->rsc->failure_timeout > 0)
4296  &last_run) == 0)) {
4297 
4298  /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
4299  * timestamp
4300  */
4301 
4302  time_t now = get_effective_time(history->rsc->cluster);
4303  time_t last_failure = 0;
4304 
4305  // Is this particular operation history older than the failure timeout?
4306  if ((now >= (last_run + history->rsc->failure_timeout))
4307  && !should_ignore_failure_timeout(history->rsc, history->task,
4308  history->interval_ms,
4309  is_last_failure)) {
4310  expired = true;
4311  }
4312 
4313  // Does the resource as a whole have an unexpired fail count?
4314  unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4315  &last_failure,
4317  history->xml);
4318 
4319  // Update scheduler recheck time according to *last* failure
4320  crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
4321  " last-failure@%lld",
4322  history->id, (long long) last_run, (expired? "" : "not "),
4323  (long long) now, unexpired_fail_count,
4324  history->rsc->failure_timeout, (long long) last_failure);
4325  last_failure += history->rsc->failure_timeout + 1;
4326  if (unexpired_fail_count && (now < last_failure)) {
4327  pe__update_recheck_time(last_failure, history->rsc->cluster,
4328  "fail count expiration");
4329  }
4330  }
4331 
4332  if (expired) {
4333  if (pe_get_failcount(history->node, history->rsc, NULL,
4334  pcmk__fc_default, history->xml)) {
4335  // There is a fail count ignoring timeout
4336 
4337  if (unexpired_fail_count == 0) {
4338  // There is no fail count considering timeout
4339  clear_reason = "it expired";
4340 
4341  } else {
4342  /* This operation is old, but there is an unexpired fail count.
4343  * In a properly functioning cluster, this should only be
4344  * possible if this operation is not a failure (otherwise the
4345  * fail count should be expired too), so this is really just a
4346  * failsafe.
4347  */
4348  pcmk__rsc_trace(history->rsc,
4349  "Resource history entry %s on %s is not "
4350  "expired: Unexpired fail count",
4351  history->id, pcmk__node_name(history->node));
4352  expired = false;
4353  }
4354 
4355  } else if (is_last_failure
4356  && (history->rsc->remote_reconnect_ms != 0)) {
4357  /* Clear any expired last failure when reconnect interval is set,
4358  * even if there is no fail count.
4359  */
4360  clear_reason = "reconnect interval is set";
4361  }
4362  }
4363 
4364  if (!expired && is_last_failure
4365  && should_clear_for_param_change(history->xml, history->task,
4366  history->rsc, history->node)) {
4367  clear_reason = "resource parameters have changed";
4368  }
4369 
4370  if (clear_reason != NULL) {
4371  pcmk_action_t *clear_op = NULL;
4372 
4373  // Schedule clearing of the fail count
4374  clear_op = pe__clear_failcount(history->rsc, history->node,
4375  clear_reason, history->rsc->cluster);
4376 
4377  if (pcmk_is_set(history->rsc->cluster->flags,
4379  && (history->rsc->remote_reconnect_ms != 0)) {
4380  /* If we're clearing a remote connection due to a reconnect
4381  * interval, we want to wait until any scheduled fencing
4382  * completes.
4383  *
4384  * We could limit this to remote_node->details->unclean, but at
4385  * this point, that's always true (it won't be reliable until
4386  * after unpack_node_history() is done).
4387  */
4388  crm_info("Clearing %s failure will wait until any scheduled "
4389  "fencing of %s completes",
4390  history->task, history->rsc->id);
4391  order_after_remote_fencing(clear_op, history->rsc,
4392  history->rsc->cluster);
4393  }
4394  }
4395 
4396  if (expired && (history->interval_ms == 0)
4397  && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4398  switch (history->exit_status) {
4399  case PCMK_OCF_OK:
4400  case PCMK_OCF_NOT_RUNNING:
4402  case PCMK_OCF_DEGRADED:
4404  // Don't expire probes that return these values
4405  pcmk__rsc_trace(history->rsc,
4406  "Resource history entry %s on %s is not "
4407  "expired: Probe result",
4408  history->id, pcmk__node_name(history->node));
4409  expired = false;
4410  break;
4411  }
4412  }
4413 
4414  return expired;
4415 }
4416 
4417 int
4418 pe__target_rc_from_xml(const xmlNode *xml_op)
4419 {
4420  int target_rc = 0;
4421  const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY);
4422 
4423  if (key == NULL) {
4424  return -1;
4425  }
4426  decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4427  return target_rc;
4428 }
4429 
4439 static void
4440 update_resource_state(struct action_history *history, int exit_status,
4441  const xmlNode *last_failure,
4442  enum action_fail_response *on_fail)
4443 {
4444  bool clear_past_failure = false;
4445 
4446  if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4447  || (!pcmk__is_bundled(history->rsc)
4448  && pcmk_xe_mask_probe_failure(history->xml))) {
4449  history->rsc->role = pcmk_role_stopped;
4450 
4451  } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4452  clear_past_failure = true;
4453 
4454  } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4455  pcmk__str_none)) {
4456  if ((last_failure != NULL)
4457  && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4458  pcmk__str_none)) {
4459  clear_past_failure = true;
4460  }
4461  if (history->rsc->role < pcmk_role_started) {
4462  set_active(history->rsc);
4463  }
4464 
4465  } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4466  history->rsc->role = pcmk_role_started;
4467  clear_past_failure = true;
4468 
4469  } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4470  history->rsc->role = pcmk_role_stopped;
4471  clear_past_failure = true;
4472 
4473  } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4474  pcmk__str_none)) {
4475  history->rsc->role = pcmk_role_promoted;
4476  clear_past_failure = true;
4477 
4478  } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4479  pcmk__str_none)) {
4480  if (*on_fail == pcmk_on_fail_demote) {
4481  /* Demote clears an error only if
4482  * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
4483  */
4484  clear_past_failure = true;
4485  }
4486  history->rsc->role = pcmk_role_unpromoted;
4487 
4488  } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4489  pcmk__str_none)) {
4490  history->rsc->role = pcmk_role_started;
4491  clear_past_failure = true;
4492 
4493  } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4494  pcmk__str_none)) {
4495  unpack_migrate_to_success(history);
4496 
4497  } else if (history->rsc->role < pcmk_role_started) {
4498  pcmk__rsc_trace(history->rsc, "%s active on %s",
4499  history->rsc->id, pcmk__node_name(history->node));
4500  set_active(history->rsc);
4501  }
4502 
4503  if (!clear_past_failure) {
4504  return;
4505  }
4506 
4507  switch (*on_fail) {
4508  case pcmk_on_fail_stop:
4509  case pcmk_on_fail_ban:
4512  pcmk__rsc_trace(history->rsc,
4513  "%s (%s) is not cleared by a completed %s",
4514  history->rsc->id, pcmk_on_fail_text(*on_fail),
4515  history->task);
4516  break;
4517 
4518  case pcmk_on_fail_block:
4519  case pcmk_on_fail_ignore:
4520  case pcmk_on_fail_demote:
4521  case pcmk_on_fail_restart:
4523  *on_fail = pcmk_on_fail_ignore;
4524  pe__set_next_role(history->rsc, pcmk_role_unknown,
4525  "clear past failures");
4526  break;
4527 
4529  if (history->rsc->remote_reconnect_ms == 0) {
4530  /* With no reconnect interval, the connection is allowed to
4531  * start again after the remote node is fenced and
4532  * completely stopped. (With a reconnect interval, we wait
4533  * for the failure to be cleared entirely before attempting
4534  * to reconnect.)
4535  */
4536  *on_fail = pcmk_on_fail_ignore;
4537  pe__set_next_role(history->rsc, pcmk_role_unknown,
4538  "clear past failures and reset remote");
4539  }
4540  break;
4541  }
4542 }
4543 
4552 static inline bool
4553 can_affect_state(struct action_history *history)
4554 {
4555 #if 0
4556  /* @COMPAT It might be better to parse only actions we know we're interested
4557  * in, rather than exclude a couple we don't. However that would be a
4558  * behavioral change that should be done at a major or minor series release.
4559  * Currently, unknown operations can affect whether a resource is considered
4560  * active and/or failed.
4561  */
4562  return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4566  "asyncmon", NULL);
4567 #else
4568  return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY,
4569  PCMK_ACTION_META_DATA, NULL);
4570 #endif
4571 }
4572 
4581 static int
4582 unpack_action_result(struct action_history *history)
4583 {
4584  if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS,
4585  &(history->execution_status)) < 0)
4586  || (history->execution_status < PCMK_EXEC_PENDING)
4587  || (history->execution_status > PCMK_EXEC_MAX)
4588  || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4589  pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4590  "with invalid " PCMK__XA_OP_STATUS " '%s'",
4591  history->id, history->rsc->id,
4592  pcmk__node_name(history->node),
4593  pcmk__s(crm_element_value(history->xml,
4595  ""));
4596  return pcmk_rc_unpack_error;
4597  }
4598  if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE,
4599  &(history->exit_status)) < 0)
4600  || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4601 #if 0
4602  /* @COMPAT We should ignore malformed entries, but since that would
4603  * change behavior, it should be done at a major or minor series
4604  * release.
4605  */
4606  pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4607  "with invalid " PCMK__XA_RC_CODE " '%s'",
4608  history->id, history->rsc->id,
4609  pcmk__node_name(history->node),
4610  pcmk__s(crm_element_value(history->xml,
4612  ""));
4613  return pcmk_rc_unpack_error;
4614 #else
4615  history->exit_status = CRM_EX_ERROR;
4616 #endif
4617  }
4618  history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON);
4619  return pcmk_rc_ok;
4620 }
4621 
4632 static int
4633 process_expired_result(struct action_history *history, int orig_exit_status)
4634 {
4635  if (!pcmk__is_bundled(history->rsc)
4636  && pcmk_xe_mask_probe_failure(history->xml)
4637  && (orig_exit_status != history->expected_exit_status)) {
4638 
4639  if (history->rsc->role <= pcmk_role_stopped) {
4640  history->rsc->role = pcmk_role_unknown;
4641  }
4642  crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
4643  "Masked failure expired",
4644  history->id, history->rsc->id,
4645  pcmk__node_name(history->node));
4646  return pcmk_rc_ok;
4647  }
4648 
4649  if (history->exit_status == history->expected_exit_status) {
4650  return pcmk_rc_undetermined; // Only failures expire
4651  }
4652 
4653  if (history->interval_ms == 0) {
4654  crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
4655  "Expired failure",
4656  history->id, history->task, history->rsc->id,
4657  pcmk__node_name(history->node));
4658  return pcmk_rc_ok;
4659  }
4660 
4661  if (history->node->details->online && !history->node->details->unclean) {
4662  /* Reschedule the recurring action. schedule_cancel() won't work at
4663  * this stage, so as a hacky workaround, forcibly change the restart
4664  * digest so pcmk__check_action_config() does what we want later.
4665  *
4666  * @TODO We should skip this if there is a newer successful monitor.
4667  * Also, this causes rescheduling only if the history entry
4668  * has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
4669  * scheduler regression test doesn't, but that may not be a
4670  * realistic scenario in production).
4671  */
4672  crm_notice("Rescheduling %s-interval %s of %s on %s "
4673  "after failure expired",
4674  pcmk__readable_interval(history->interval_ms), history->task,
4675  history->rsc->id, pcmk__node_name(history->node));
4677  "calculated-failure-timeout");
4678  return pcmk_rc_ok;
4679  }
4680 
4681  return pcmk_rc_undetermined;
4682 }
4683 
4693 static void
4694 mask_probe_failure(struct action_history *history, int orig_exit_status,
4695  const xmlNode *last_failure,
4696  enum action_fail_response *on_fail)
4697 {
4698  pcmk_resource_t *ban_rsc = history->rsc;
4699 
4700  if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) {
4701  ban_rsc = uber_parent(history->rsc);
4702  }
4703 
4704  crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
4705  services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
4706  pcmk__node_name(history->node));
4707  update_resource_state(history, history->expected_exit_status, last_failure,
4708  on_fail);
4709  crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
4710 
4711  record_failed_op(history);
4712  resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
4713  "masked-probe-failure", history->rsc->cluster);
4714 }
4715 
4728 static bool
4729 failure_is_newer(const struct action_history *history,
4730  const xmlNode *last_failure)
4731 {
4732  guint failure_interval_ms = 0U;
4733  long long failure_change = 0LL;
4734  long long this_change = 0LL;
4735 
4736  if (last_failure == NULL) {
4737  return false; // Resource has no last_failure entry
4738  }
4739 
4740  if (!pcmk__str_eq(history->task,
4741  crm_element_value(last_failure, PCMK_XA_OPERATION),
4742  pcmk__str_none)) {
4743  return false; // last_failure is for different action
4744  }
4745 
4746  if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL,
4747  &failure_interval_ms) != pcmk_ok)
4748  || (history->interval_ms != failure_interval_ms)) {
4749  return false; // last_failure is for action with different interval
4750  }
4751 
4753  &this_change, 0LL) != pcmk_rc_ok)
4754  || (pcmk__scan_ll(crm_element_value(last_failure,
4756  &failure_change, 0LL) != pcmk_rc_ok)
4757  || (failure_change < this_change)) {
4758  return false; // Failure is not known to be newer
4759  }
4760 
4761  return true;
4762 }
4763 
4771 static void
4772 process_pending_action(struct action_history *history,
4773  const xmlNode *last_failure)
4774 {
4775  /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4776  * and there might be a RSC_monitor_INTERVAL entry with the last successful
4777  * or pending result.
4778  *
4779  * If last_failure contains the failure of the pending recurring monitor
4780  * we're processing here, and is newer, the action is no longer pending.
4781  * (Pending results have call ID -1, which sorts last, so the last failure
4782  * if any should be known.)
4783  */
4784  if (failure_is_newer(history, last_failure)) {
4785  return;
4786  }
4787 
4788  if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4790  set_active(history->rsc);
4791 
4792  } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4793  history->rsc->role = pcmk_role_promoted;
4794 
4795  } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4796  && history->node->details->unclean) {
4797  /* A migrate_to action is pending on a unclean source, so force a stop
4798  * on the target.
4799  */
4800  const char *migrate_target = NULL;
4801  pcmk_node_t *target = NULL;
4802 
4803  migrate_target = crm_element_value(history->xml,
4805  target = pcmk_find_node(history->rsc->cluster, migrate_target);
4806  if (target != NULL) {
4807  stop_action(history->rsc, target, FALSE);
4808  }
4809  }
4810 
4811  if (history->rsc->pending_task != NULL) {
4812  /* There should never be multiple pending actions, but as a failsafe,
4813  * just remember the first one processed for display purposes.
4814  */
4815  return;
4816  }
4817 
4818  if (pcmk_is_probe(history->task, history->interval_ms)) {
4819  /* Pending probes are currently never displayed, even if pending
4820  * operations are requested. If we ever want to change that,
4821  * enable the below and the corresponding part of
4822  * native.c:native_pending_task().
4823  */
4824 #if 0
4825  history->rsc->pending_task = strdup("probe");
4826  history->rsc->pending_node = history->node;
4827 #endif
4828  } else {
4829  history->rsc->pending_task = strdup(history->task);
4830  history->rsc->pending_node = history->node;
4831  }
4832 }
4833 
4834 static void
4835 unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
4836  xmlNode **last_failure, enum action_fail_response *on_fail)
4837 {
4838  int old_rc = 0;
4839  bool expired = false;
4840  pcmk_resource_t *parent = rsc;
4841  enum rsc_role_e fail_role = pcmk_role_unknown;
4842  enum action_fail_response failure_strategy = pcmk_on_fail_restart;
4843 
4844  struct action_history history = {
4845  .rsc = rsc,
4846  .node = node,
4847  .xml = xml_op,
4848  .execution_status = PCMK_EXEC_UNKNOWN,
4849  };
4850 
4851  CRM_CHECK(rsc && node && xml_op, return);
4852 
4853  history.id = pcmk__xe_id(xml_op);
4854  if (history.id == NULL) {
4855  pcmk__config_err("Ignoring resource history entry for %s on %s "
4856  "without ID", rsc->id, pcmk__node_name(node));
4857  return;
4858  }
4859 
4860  // Task and interval
4861  history.task = crm_element_value(xml_op, PCMK_XA_OPERATION);
4862  if (history.task == NULL) {
4863  pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4864  "without " PCMK_XA_OPERATION,
4865  history.id, rsc->id, pcmk__node_name(node));
4866  return;
4867  }
4868  crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
4869  if (!can_affect_state(&history)) {
4870  pcmk__rsc_trace(rsc,
4871  "Ignoring resource history entry %s for %s on %s "
4872  "with irrelevant action '%s'",
4873  history.id, rsc->id, pcmk__node_name(node),
4874  history.task);
4875  return;
4876  }
4877 
4878  if (unpack_action_result(&history) != pcmk_rc_ok) {
4879  return; // Error already logged
4880  }
4881 
4882  history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4883  history.key = pcmk__xe_history_key(xml_op);
4884  crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
4885 
4886  pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4887  history.id, history.task, history.call_id,
4888  pcmk__node_name(node),
4889  pcmk_exec_status_str(history.execution_status),
4890  crm_exit_str(history.exit_status));
4891 
4892  if (node->details->unclean) {
4893  pcmk__rsc_trace(rsc,
4894  "%s is running on %s, which is unclean (further action "
4895  "depends on value of stop's on-fail attribute)",
4896  rsc->id, pcmk__node_name(node));
4897  }
4898 
4899  expired = check_operation_expiry(&history);
4900  old_rc = history.exit_status;
4901 
4902  remap_operation(&history, on_fail, expired);
4903 
4904  if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4905  goto done;
4906  }
4907 
4908  if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4909  mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4910  goto done;
4911  }
4912 
4913  if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
4914  parent = uber_parent(rsc);
4915  }
4916 
4917  switch (history.execution_status) {
4918  case PCMK_EXEC_PENDING:
4919  process_pending_action(&history, *last_failure);
4920  goto done;
4921 
4922  case PCMK_EXEC_DONE:
4923  update_resource_state(&history, history.exit_status, *last_failure,
4924  on_fail);
4925  goto done;
4926 
4928  unpack_failure_handling(&history, &failure_strategy, &fail_role);
4929  if (failure_strategy == pcmk_on_fail_ignore) {
4930  crm_warn("Cannot ignore failed %s of %s on %s: "
4931  "Resource agent doesn't exist "
4932  CRM_XS " status=%d rc=%d id=%s",
4933  history.task, rsc->id, pcmk__node_name(node),
4934  history.execution_status, history.exit_status,
4935  history.id);
4936  /* Also for printing it as "FAILED" by marking it as
4937  * pcmk_rsc_failed later
4938  */
4939  *on_fail = pcmk_on_fail_ban;
4940  }
4942  "hard-error", rsc->cluster);
4943  unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4944  last_failure, on_fail);
4945  goto done;
4946 
4948  if (pcmk__is_pacemaker_remote_node(node)
4949  && pcmk_is_set(node->details->remote_rsc->flags,
4950  pcmk_rsc_managed)) {
4951  /* We should never get into a situation where a managed remote
4952  * connection resource is considered OK but a resource action
4953  * behind the connection gets a "not connected" status. But as a
4954  * fail-safe in case a bug or unusual circumstances do lead to
4955  * that, ensure the remote connection is considered failed.
4956  */
4959  }
4960  break; // Not done, do error handling
4961 
4962  case PCMK_EXEC_ERROR:
4963  case PCMK_EXEC_ERROR_HARD:
4964  case PCMK_EXEC_ERROR_FATAL:
4965  case PCMK_EXEC_TIMEOUT:
4967  case PCMK_EXEC_INVALID:
4968  break; // Not done, do error handling
4969 
4970  default: // No other value should be possible at this point
4971  break;
4972  }
4973 
4974  unpack_failure_handling(&history, &failure_strategy, &fail_role);
4975  if ((failure_strategy == pcmk_on_fail_ignore)
4976  || ((failure_strategy == pcmk_on_fail_restart_container)
4977  && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4978 
4979  char *last_change_s = last_change_str(xml_op);
4980 
4981  crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4982  CRM_XS " %s",
4983  history.task, services_ocf_exitcode_str(history.exit_status),
4984  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4985  pcmk__s(history.exit_reason, ""), rsc->id,
4986  pcmk__node_name(node), last_change_s, history.id);
4987  free(last_change_s);
4988 
4989  update_resource_state(&history, history.expected_exit_status,
4990  *last_failure, on_fail);
4991  crm_xml_add(xml_op, PCMK_XA_UNAME, node->details->uname);
4993 
4994  record_failed_op(&history);
4995 
4996  if ((failure_strategy == pcmk_on_fail_restart_container)
4997  && cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) {
4998  *on_fail = failure_strategy;
4999  }
5000 
5001  } else {
5002  unpack_rsc_op_failure(&history, failure_strategy, fail_role,
5003  last_failure, on_fail);
5004 
5005  if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
5006  uint8_t log_level = LOG_ERR;
5007 
5008  if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
5009  log_level = LOG_NOTICE;
5010  }
5011  do_crm_log(log_level,
5012  "Preventing %s from restarting on %s because "
5013  "of hard failure (%s%s%s) " CRM_XS " %s",
5014  parent->id, pcmk__node_name(node),
5015  services_ocf_exitcode_str(history.exit_status),
5016  (pcmk__str_empty(history.exit_reason)? "" : ": "),
5017  pcmk__s(history.exit_reason, ""), history.id);
5019  "hard-error", rsc->cluster);
5020 
5021  } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
5022  pcmk__sched_err("Preventing %s from restarting anywhere because "
5023  "of fatal failure (%s%s%s) " CRM_XS " %s",
5024  parent->id,
5025  services_ocf_exitcode_str(history.exit_status),
5026  (pcmk__str_empty(history.exit_reason)? "" : ": "),
5027  pcmk__s(history.exit_reason, ""), history.id);
5029  "fatal-error", rsc->cluster);
5030  }
5031  }
5032 
5033 done:
5034  pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
5035  rsc->id, pcmk__node_name(node), history.id,
5036  pcmk_role_text(rsc->role),
5037  pcmk_role_text(rsc->next_role));
5038 }
5039 
5040 static void
5041 add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
5043 {
5044  const char *cluster_name = NULL;
5045 
5046  pe_rule_eval_data_t rule_data = {
5047  .node_hash = NULL,
5048  .now = scheduler->now,
5049  .match_data = NULL,
5050  .rsc_data = NULL,
5051  .op_data = NULL
5052  };
5053 
5055  CRM_ATTR_UNAME, node->details->uname);
5056 
5058  if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) {
5059  scheduler->dc_node = node;
5060  node->details->is_dc = TRUE;
5063  } else {
5066  }
5067 
5068  cluster_name = g_hash_table_lookup(scheduler->config_hash,
5070  if (cluster_name) {
5072  cluster_name);
5073  }
5074 
5076  node->details->attrs, NULL, overwrite,
5077  scheduler);
5078 
5079  pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_data,
5080  node->details->utilization, NULL,
5081  FALSE, scheduler);
5082 
5083  if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
5084  pcmk__rsc_node_current) == NULL) {
5085  const char *site_name = pcmk__node_attr(node, "site-name", NULL,
5087 
5088  if (site_name) {
5090  CRM_ATTR_SITE_NAME, site_name);
5091 
5092  } else if (cluster_name) {
5093  /* Default to cluster-name if unset */
5095  CRM_ATTR_SITE_NAME, cluster_name);
5096  }
5097  }
5098 }
5099 
5100 static GList *
5101 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
5102 {
5103  int counter = -1;
5104  int stop_index = -1;
5105  int start_index = -1;
5106 
5107  xmlNode *rsc_op = NULL;
5108 
5109  GList *gIter = NULL;
5110  GList *op_list = NULL;
5111  GList *sorted_op_list = NULL;
5112 
5113  /* extract operations */
5114  op_list = NULL;
5115  sorted_op_list = NULL;
5116 
5117  for (rsc_op = pcmk__xe_first_child(rsc_entry, NULL, NULL, NULL);
5118  rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
5119 
5120  if (pcmk__xe_is(rsc_op, PCMK__XE_LRM_RSC_OP)) {
5121  crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc);
5122  crm_xml_add(rsc_op, PCMK_XA_UNAME, node);
5123  op_list = g_list_prepend(op_list, rsc_op);
5124  }
5125  }
5126 
5127  if (op_list == NULL) {
5128  /* if there are no operations, there is nothing to do */
5129  return NULL;
5130  }
5131 
5132  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
5133 
5134  /* create active recurring operations as optional */
5135  if (active_filter == FALSE) {
5136  return sorted_op_list;
5137  }
5138 
5139  op_list = NULL;
5140 
5141  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
5142 
5143  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5144  xmlNode *rsc_op = (xmlNode *) gIter->data;
5145 
5146  counter++;
5147 
5148  if (start_index < stop_index) {
5149  crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
5150  break;
5151 
5152  } else if (counter < start_index) {
5153  crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op));
5154  continue;
5155  }
5156  op_list = g_list_append(op_list, rsc_op);
5157  }
5158 
5159  g_list_free(sorted_op_list);
5160  return op_list;
5161 }
5162 
5163 GList *
5164 find_operations(const char *rsc, const char *node, gboolean active_filter,
5166 {
5167  GList *output = NULL;
5168  GList *intermediate = NULL;
5169 
5170  xmlNode *tmp = NULL;
5171  xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS,
5172  NULL, NULL);
5173 
5174  pcmk_node_t *this_node = NULL;
5175 
5176  xmlNode *node_state = NULL;
5177 
5178  CRM_CHECK(status != NULL, return NULL);
5179 
5180  for (node_state = pcmk__xe_first_child(status, NULL, NULL, NULL);
5181  node_state != NULL; node_state = pcmk__xe_next(node_state)) {
5182 
5183  if (pcmk__xe_is(node_state, PCMK__XE_NODE_STATE)) {
5184  const char *uname = crm_element_value(node_state, PCMK_XA_UNAME);
5185 
5186  if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5187  continue;
5188  }
5189 
5190  this_node = pcmk_find_node(scheduler, uname);
5191  if(this_node == NULL) {
5192  CRM_LOG_ASSERT(this_node != NULL);
5193  continue;
5194 
5195  } else if (pcmk__is_pacemaker_remote_node(this_node)) {
5196  determine_remote_online_status(scheduler, this_node);
5197 
5198  } else {
5199  determine_online_status(node_state, this_node, scheduler);
5200  }
5201 
5202  if (this_node->details->online
5204  /* offline nodes run no resources...
5205  * unless stonith is enabled in which case we need to
5206  * make sure rsc start events happen after the stonith
5207  */
5208  xmlNode *lrm_rsc = NULL;
5209 
5210  tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
5211  NULL);
5213  NULL);
5214 
5215  for (lrm_rsc = pcmk__xe_first_child(tmp, NULL, NULL, NULL);
5216  lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) {
5217 
5218  if (pcmk__xe_is(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
5219  const char *rsc_id = crm_element_value(lrm_rsc,
5220  PCMK_XA_ID);
5221 
5222  if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
5223  continue;
5224  }
5225 
5226  intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5227  output = g_list_concat(output, intermediate);
5228  }
5229  }
5230  }
5231  }
5232  }
5233 
5234  return output;
5235 }
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition: complex.c:1071
GHashTable * tags
Definition: scheduler.h:253
Services API.
pcmk__cpg_host_t host
Definition: cpg.c:52
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:245
bool pe_can_fence(const pcmk_scheduler_t *scheduler, const pcmk_node_t *node)
Definition: utils.c:36
pcmk_node_t * pcmk_find_node(const pcmk_scheduler_t *scheduler, const char *node_name)
Find a node by name in scheduler data.
Definition: scheduler.c:103
xmlNode * pcmk__xml_copy(xmlNode *parent, xmlNode *src)
Definition: xml.c:974
enum pe_quorum_policy no_quorum_policy
Definition: scheduler.h:217
bool pe__shutdown_requested(const pcmk_node_t *node)
Definition: utils.c:677
A dumping ground.
pcmk_ticket_t * ticket_new(const char *ticket_id, pcmk_scheduler_t *scheduler)
Definition: utils.c:517
pcmk_node_t * pe__copy_node(const pcmk_node_t *this_node)
Definition: utils.c:89
Service failed and possibly in promoted role.
Definition: results.h:189
#define crm_notice(fmt, args...)
Definition: logging.h:397
GHashTable * known_on
Definition: resources.h:459
#define PCMK__XE_LRM_RESOURCES
No connection to executor.
Definition: results.h:337
pcmk_scheduler_t * cluster
Definition: resources.h:408
gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
Definition: unpack.c:638
GHashTable * attrs
Definition: nodes.h:143
#define PCMK_XA_NAME
Definition: xml_names.h:330
const char * pcmk_role_text(enum rsc_role_e role)
Get readable description of a resource role.
Definition: roles.c:23
gboolean unseen
Definition: nodes.h:94
gboolean fixed
Definition: nodes.h:164
char data[0]
Definition: cpg.c:58
#define PCMK_OPT_STONITH_ENABLED
Definition: options.h:65
#define PCMK__XE_TICKET_STATE
pcmk_node_t *(* location)(const pcmk_resource_t *rsc, GList **list, int current)
Definition: resources.h:328
Service active and promoted.
Definition: results.h:188
#define CRM_ATTR_KIND
Definition: crm.h:101
#define ST__LEVEL_MIN
Definition: crm_internal.h:88
bool pe__is_universal_clone(const pcmk_resource_t *rsc, const pcmk_scheduler_t *scheduler)
Definition: clone.c:1283
pcmk_node_t * partial_migration_target
Definition: resources.h:450
int pcmk__xe_copy_attrs(xmlNode *target, const xmlNode *src, uint32_t flags)
Definition: xml.c:669
#define PCMK_VALUE_FALSE
Definition: options.h:152
#define PCMK__XA_RC_CODE
pcmk_resource_t * pe__find_bundle_replica(const pcmk_resource_t *bundle, const pcmk_node_t *node)
Definition: bundle.c:1402
GHashTable * state
Definition: tickets.h:35
int pcmk__scan_min_int(const char *text, int *result, int minimum)
Definition: strings.c:126
#define PCMK_XE_STATUS
Definition: xml_names.h:204
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
Definition: complex.c:1046
#define CRM_ATTR_IS_DC
Definition: crm.h:103
#define stop_action(rsc, node, optional)
Definition: internal.h:214
#define PCMK_XE_TEMPLATE
Definition: xml_names.h:211
Stopped.
Definition: roles.h:36
const char * name
Definition: cib.c:26
bool pcmk__strcase_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:1038
#define PCMK_OPT_CONCURRENT_FENCING
Definition: options.h:33
#define XPATH_NODE_STATE
Definition: unpack.c:2960
#define PCMK_XE_PRIMITIVE
Definition: xml_names.h:164
enum rsc_role_e role
Definition: resources.h:464
gint pe__cmp_node_name(gconstpointer a, gconstpointer b)
Definition: utils.c:145
#define pcmk__config_warn(fmt...)
GList * children
Definition: resources.h:471
#define pcmk__rsc_trace(rsc, fmt, args...)
pcmk__op_digest_t * rsc_action_digest_cmp(pcmk_resource_t *rsc, const xmlNode *xml_op, pcmk_node_t *node, pcmk_scheduler_t *scheduler)
Definition: pe_digest.c:394
Match only clones and their instances, by either clone or instance ID.
Definition: resources.h:191
gboolean standby
Definition: tickets.h:34
#define PCMK_XA_RESOURCE_DISCOVERY
Definition: xml_names.h:384
int priority_fencing_delay
Definition: scheduler.h:261
pcmk_resource_t * pe_find_resource(GList *rsc_list, const char *id)
Definition: status.c:435
#define pcmk__rsc_info(rsc, fmt, args...)
#define PCMK_OPT_SHUTDOWN_LOCK
Definition: options.h:60
pcmk_resource_t * pe__create_clone_child(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
Definition: clone.c:247
enum rsc_role_e next_role
Definition: resources.h:465
bool pcmk_is_probe(const char *task, guint interval)
Check whether an action name and interval represent a probe.
Definition: probes.c:30
gboolean get_target_role(const pcmk_resource_t *rsc, enum rsc_role_e *role)
Definition: utils.c:410
int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b, bool same_node_default)
Definition: pe_actions.c:1648
#define pcmk__config_err(fmt...)
#define PCMK_ACTION_META_DATA
Definition: actions.h:56
#define PCMK_ACTION_MONITOR
Definition: actions.h:60
#define PCMK_XA_EXIT_REASON
Definition: xml_names.h:274
GHashTable * meta
Definition: resources.h:467
#define PCMK_XA_NO_QUORUM_PANIC
Definition: xml_names.h:333
Service safely stopped.
Definition: results.h:186
#define set_config_flag(scheduler, option, flag)
Definition: unpack.c:51
pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler)
Definition: pe_actions.c:1281
#define PCMK__XA_RSC_ID
Unspecified error.
Definition: results.h:252
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition: nvpair.c:313
#define PCMK_ACTION_MIGRATE_TO
Definition: actions.h:59
#define PCMK_XA_INDEX
Definition: xml_names.h:305
gboolean pending
Definition: nodes.h:88
#define SUB_XPATH_LRM_RSC_OP
Definition: unpack.c:2965
Promoted.
Definition: roles.h:39
char * id
Definition: tags.h:30
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
Definition: pe_actions.c:1784
#define PCMK_OPT_CLUSTER_NAME
Definition: options.h:31
Necessary CIB secrets are unavailable.
Definition: results.h:340
const char * pcmk_on_fail_text(enum action_fail_response on_fail)
Get string equivalent of a failure handling type.
Definition: actions.c:147
action_fail_response
Definition: actions.h:130
#define PCMK__XA_OP_RESTART_DIGEST
#define CRM_LOG_ASSERT(expr)
Definition: logging.h:228
Service promoted but more likely to fail soon.
Definition: results.h:191
pcmk_action_t * pe__clear_failcount(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *reason, pcmk_scheduler_t *scheduler)
Schedule a controller operation to clear a fail count.
Definition: failcounts.c:470
enum crm_ais_msg_types type
Definition: cpg.c:51
#define CRMD_JOINSTATE_NACK
Definition: crm.h:146
#define CRM_ATTR_CLUSTER_NAME
Definition: crm.h:104
Ensure crm_exit_t can hold this.
Definition: results.h:316
void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:112
GHashTable * tickets
Definition: scheduler.h:222
void pcmk__validate_cluster_options(GHashTable *options)
Definition: options.c:1565
const char * pcmk__cluster_option(GHashTable *options, const char *name)
Definition: options.c:1419
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1553
Action did not complete in time.
Definition: results.h:331
const char * pcmk_rc_str(int rc)
Get a user-friendly description of a return code.
Definition: results.c:503
#define PCMK_NODE_ATTR_MAINTENANCE
Definition: nodes.h:31
#define PCMK_OPT_STOP_ORPHAN_ACTIONS
Definition: options.h:70
pcmk_scheduler_t * data_set
Definition: nodes.h:154
pcmk_resource_t * container
Definition: resources.h:476
gboolean remote_was_fenced
Definition: nodes.h:119
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Definition: nvpair.c:494
Execution failed, do not retry on node.
Definition: results.h:334
bool pcmk__ends_with(const char *s, const char *match)
Definition: strings.c:620
#define PCMK_XA_TYPE
Definition: xml_names.h:430
#define PCMK_META_REMOTE_CONNECT_TIMEOUT
Definition: options.h:107
#define PCMK_OPT_STONITH_ACTION
Definition: options.h:64
#define PCMK_XA_RESOURCE
Definition: xml_names.h:382
#define PCMK_XA_OPERATION
Definition: xml_names.h:349
int pe__unpack_resource(xmlNode *xml_obj, pcmk_resource_t **rsc, pcmk_resource_t *parent, pcmk_scheduler_t *scheduler)
Definition: complex.c:691
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
Definition: xpath.c:189
gboolean remote_requires_reset
Definition: nodes.h:113
Action was cancelled.
Definition: results.h:330
gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition: unpack.c:871
#define PCMK_XA_STANDBY
Definition: xml_names.h:406
int pe_get_failcount(const pcmk_node_t *node, pcmk_resource_t *rsc, time_t *last_failure, uint32_t flags, const xmlNode *xml_op)
Definition: failcounts.c:373
No fence device is configured for target.
Definition: results.h:339
const char * action
Definition: pcmk_fence.c:30
#define PCMK_OPT_ENABLE_STARTUP_PROBES
Definition: options.h:38
#define PCMK_META_REMOTE_ALLOW_MIGRATE
Definition: options.h:106
#define PCMK_META_IS_MANAGED
Definition: options.h:92
GList * resources
Definition: scheduler.h:231
#define PCMK__XE_TRANSIENT_ATTRIBUTES
int pcmk__effective_rc(int rc)
Definition: agents.c:71
bool pcmk_xe_is_probe(const xmlNode *xml_op)
Check whether an action history entry represents a probe.
Definition: probes.c:45
#define PCMK__META_MIGRATE_TARGET
gboolean remote_maintenance
Definition: nodes.h:125
#define PCMK_META_REMOTE_ADDR
Definition: options.h:105
#define pcmk__rsc_debug(rsc, fmt, args...)
#define demote_action(rsc, node, optional)
Definition: internal.h:230
gboolean is_dc
Definition: nodes.h:101
char int pcmk_parse_interval_spec(const char *input, guint *result_ms)
Parse milliseconds from a Pacemaker interval specification.
Definition: strings.c:462
pcmk_resource_t *(* find_rsc)(pcmk_resource_t *rsc, const char *search, const pcmk_node_t *node, int flags)
Definition: resources.h:276
gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc)
Parse a transition key into its constituent parts.
Definition: actions.c:425
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port)
Definition: remote.c:125
#define PCMK_OPT_PLACEMENT_STRATEGY
Definition: options.h:57
#define PCMK_ACTION_DEMOTE
Definition: actions.h:49
int weight
Definition: nodes.h:163
#define PCMK__XE_LRM_RESOURCE
#define PCMK__XA_TRANSITION_KEY
int pcmk__scan_ll(const char *text, long long *result, long long default_value)
Definition: strings.c:96
pcmk_resource_t * parent
Definition: resources.h:409
GList * dangling_migrations
Definition: resources.h:474
#define CRMD_JOINSTATE_DOWN
Definition: crm.h:143
Maximum value for this enum.
Definition: results.h:343
#define crm_warn(fmt, args...)
Definition: logging.h:394
guint remote_reconnect_ms
Definition: resources.h:423
void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
Definition: complex.c:1292
#define PCMK_XE_TAG
Definition: xml_names.h:208
const char * crm_exit_str(crm_exit_t exit_code)
Definition: results.c:642
char * clone_zero(const char *last_rsc_id)
Definition: unpack.c:2010
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition: nvpair.c:614
#define PCMK_VALUE_FENCE
Definition: options.h:153
#define crm_debug(fmt, args...)
Definition: logging.h:402
#define PCMK_XA_UNAME
Definition: xml_names.h:431
#define PCMK_XA_EXPECTED
Definition: xml_names.h:278
Utility functions.
Used only to initialize variables.
Definition: results.h:327
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:458
#define PCMK_OPT_STOP_ALL_RESOURCES
Definition: options.h:69
const char * pe_base_name_end(const char *id)
Definition: unpack.c:1948
xmlNode * pcmk__xe_first_child(const xmlNode *parent, const char *node_name, const char *attr_n, const char *attr_v)
Definition: xml.c:481
void resource_location(pcmk_resource_t *rsc, const pcmk_node_t *node, int score, const char *tag, pcmk_scheduler_t *scheduler)
Definition: utils.c:359
Parameter invalid (in local context)
Definition: results.h:179
gboolean unpacked
Definition: nodes.h:127
#define pcmk__sched_err(fmt...)
#define PCMK_XE_UTILIZATION
Definition: xml_names.h:217
Parameter invalid (inherently)
Definition: results.h:183
#define CRM_ATTR_UNAME
Definition: crm.h:99
bool pcmk_xe_mask_probe_failure(const xmlNode *xml_op)
Check whether an action history entry represents a maskable probe.
Definition: probes.c:69
#define crm_trace(fmt, args...)
Definition: logging.h:404
#define CRMD_JOINSTATE_MEMBER
Definition: crm.h:145
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:181
#define PCMK__VALUE_PING
void pcmk__g_strcat(GString *buffer,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:1308
bool xml_contains_remote_node(xmlNode *xml)
Definition: remote.c:47
#define PCMK_VALUE_MEMBER
Definition: options.h:169
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:94
Insufficient privileges.
Definition: results.h:181
#define PCMK_OPT_MAINTENANCE_MODE
Definition: options.h:44
#define PCMK_META_REMOTE_NODE
Definition: options.h:108
#define pcmk__clear_scheduler_flags(scheduler, flags_to_clear)
const char * stonith_action
Definition: scheduler.h:205
struct pe_node_shared_s * details
Definition: nodes.h:168
bool pe__bundle_needs_remote_name(pcmk_resource_t *rsc)
Definition: bundle.c:920
#define PCMK_OPT_SHUTDOWN_LOCK_LIMIT
Definition: options.h:61
#define crm_log_xml_debug(xml, text)
Definition: logging.h:411
#define PCMK_XE_CLUSTER_PROPERTY_SET
Definition: xml_names.h:84
#define PCMK_ACTION_START
Definition: actions.h:72
unsigned long long flags
Definition: resources.h:428
const char * uname
Definition: nodes.h:74
#define PCMK_VALUE_IGNORE
Definition: options.h:161
Unpromoted.
Definition: roles.h:38
#define PCMK_OPT_PRIORITY_FENCING_DELAY
Definition: options.h:58
void pcmk__str_update(char **str, const char *value)
Definition: strings.c:1289
Wrappers for and extensions to libxml2.
GHashTable * config_hash
Definition: scheduler.h:219
rsc_role_e
Definition: roles.h:34
#define PCMK_OPT_STARTUP_FENCING
Definition: options.h:63
char * clone_name
Definition: resources.h:397
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
Definition: utils.c:627
#define PCMK_META_TARGET_ROLE
Definition: options.h:113
#define ST__LEVEL_MAX
Definition: crm_internal.h:89
void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pcmk_scheduler_t *scheduler)
Definition: utils.c:719
time_t lock_time
Definition: resources.h:483
Action completed, result is known.
Definition: results.h:329
int crm_element_value_epoch(const xmlNode *xml, const char *name, time_t *dest)
Retrieve the seconds-since-epoch value of an XML attribute.
Definition: nvpair.c:651
#define PCMK_ACTION_STOP
Definition: actions.h:75
#define PCMK__XA_JOIN
Flag has no effect.
Definition: xml_internal.h:440
GHashTable * pe__node_list2table(const GList *list)
Definition: utils.c:115
#define PCMK_NODE_ATTR_TERMINATE
Definition: nodes.h:33
time_t last_granted
Definition: tickets.h:33
#define PCMK_VALUE_TRUE
Definition: options.h:215
#define PCMK_XA_ID
Definition: xml_names.h:301
#define PCMK__ACTION_POWEROFF
#define pcmk__set_rsc_flags(resource, flags_to_set)
Execution failed, do not retry anywhere.
Definition: results.h:335
gboolean standby
Definition: nodes.h:83
#define PCMK__XE_LRM
#define PCMK_NODE_ATTR_STANDBY
Definition: nodes.h:32
#define PCMK_XA_VALUE
Definition: xml_names.h:442
#define PCMK_XA_SCORE
Definition: xml_names.h:396
void pe__free_digests(gpointer ptr)
Definition: pe_digest.c:33
gboolean expected_up
Definition: nodes.h:100
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition: actions.c:196
#define PCMK_OPT_NODE_PENDING_TIMEOUT
Definition: options.h:53
Dependencies not available locally.
Definition: results.h:182
#define PCMK_OPT_START_FAILURE_IS_FATAL
Definition: options.h:62
#define PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
enum pe_obj_types variant
Definition: resources.h:410
bool pcmk__str_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:1062
xmlNode * input
Definition: scheduler.h:196
gboolean granted
Definition: tickets.h:32
#define pcmk__str_copy(str)
#define pcmk__warn_once(wo_flag, fmt...)
#define PCMK_XE_TICKETS
Definition: xml_names.h:213
const char * placement_strategy
Definition: scheduler.h:206
gboolean order_actions(pcmk_action_t *first, pcmk_action_t *then, uint32_t flags)
Definition: utils.c:457
void pe__update_recheck_time(time_t recheck, pcmk_scheduler_t *scheduler, const char *reason)
Definition: utils.c:694
uint32_t id
Definition: cpg.c:48
gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
Definition: unpack.c:214
#define PCMK_VALUE_FENCE_LEGACY
Definition: options.h:224
const char * id
Definition: nodes.h:73
char * id
Definition: tickets.h:31
void native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_scheduler_t *scheduler, gboolean failed)
Definition: native.c:91
pcmk_node_t * pe_find_node_any(const GList *node_list, const char *id, const char *node_name)
Find a node by name or ID in a list of nodes.
Definition: status.c:470
#define PCMK_XE_META_ATTRIBUTES
Definition: xml_names.h:130
guint shutdown_lock
Definition: scheduler.h:260
Unspecified error.
Definition: results.h:177
pcmk_action_t * custom_action(pcmk_resource_t *rsc, char *key, const char *task, const pcmk_node_t *on_node, gboolean optional, pcmk_scheduler_t *scheduler)
Create or update an action object.
Definition: pe_actions.c:1129
GList * refs
Definition: tags.h:31
#define pcmk__assert(expr)
const char * target
Definition: pcmk_fence.c:29
GList * fillers
Definition: resources.h:477
GList * running_rsc
Definition: nodes.h:140
gboolean rsc_discovery_enabled
Definition: nodes.h:107
#define CRM_XS
Definition: logging.h:56
#define PCMK_VALUE_ONLINE
Definition: options.h:184
Requested action not implemented.
Definition: results.h:180
#define PCMK_OPT_STONITH_TIMEOUT
Definition: options.h:67
int crm_str_to_boolean(const char *s, int *ret)
Definition: strings.c:508
const char * localhost
Definition: scheduler.h:251
xmlXPathObjectPtr xpath_search(const xmlNode *xml_top, const char *path)
Definition: xpath.c:139
int pe__target_rc_from_xml(const xmlNode *xml_op)
Definition: unpack.c:4418
xmlNode * pcmk__find_action_config(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, bool include_disabled)
Definition: pe_actions.c:132
Service active but more likely to fail soon.
Definition: results.h:190
#define PCMK_XE_NODE
Definition: xml_names.h:136
gboolean is_remote_node
Definition: resources.h:431
#define PCMK_META_INTERVAL
Definition: options.h:91
#define PCMK_XA_LAST_RC_CHANGE
Definition: xml_names.h:316
Agent does not implement requested action.
Definition: results.h:332
#define PCMK_XE_FENCING_LEVEL
Definition: xml_names.h:117
GHashTable * pcmk__strkey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition: strings.c:695
pcmk__action_result_t result
Definition: pcmk_fence.c:35
#define PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS
Definition: options.h:137
pcmk_rsc_methods_t * fns
Definition: resources.h:412
gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
Definition: unpack.c:973
guint node_pending_timeout
Definition: scheduler.h:266
G_GNUC_INTERNAL gint pe__cmp_rsc_priority(gconstpointer a, gconstpointer b)
Definition: utils.c:295
gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition: unpack.c:727
#define PCMK_OPT_SYMMETRIC_CLUSTER
Definition: options.h:72
void pe__unpack_node_health_scores(pcmk_scheduler_t *scheduler)
Definition: pe_health.c:24
pcmk_scheduler_t * scheduler
#define PCMK__XE_LRM_RSC_OP
pcmk_node_t * lock_node
Definition: resources.h:481
Success.
Definition: results.h:174
GHashTable * node_hash
Definition: common.h:46
char * pcmk__epoch2str(const time_t *source, uint32_t flags)
Definition: iso8601.c:2147
#define PCMK_XA_CRMD
Definition: xml_names.h:256
#define PCMK_META_REMOTE_PORT
Definition: options.h:109
Action is pending.
Definition: results.h:199
#define PCMK_OPT_STOP_ORPHAN_RESOURCES
Definition: options.h:71
int pcmk__xe_get_score(const xmlNode *xml, const char *name, int *score, int default_score)
Definition: xml.c:549
#define PCMK_ACTION_MIGRATE_FROM
Definition: actions.h:58
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
Definition: attrs.c:118
#define PCMK__XA_OP_STATUS
#define pcmk__sched_warn(fmt...)
pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, int score, pcmk_scheduler_t *scheduler)
Definition: unpack.c:470
#define PCMK__XA_GRANTED
#define PCMK_META_ON_FAIL
Definition: options.h:98
Started.
Definition: roles.h:37
#define PCMK_XE_OBJ_REF
Definition: xml_names.h:145
Definition: tags.h:29
#define crm_log_xml_info(xml, text)
Definition: logging.h:410
#define PCMK__XA_IN_CCM
GHashTable * utilization
Definition: nodes.h:144
gboolean shutdown
Definition: nodes.h:98
char uname[MAX_NAME]
Definition: cpg.c:53
#define PCMK_ACTION_PROMOTE
Definition: actions.h:66
#define PCMK_OPT_NO_QUORUM_POLICY
Definition: options.h:46
#define PCMK_OPT_HAVE_WATCHDOG
Definition: options.h:40
#define PCMK_XE_GROUP
Definition: xml_names.h:119
#define CRMD_JOINSTATE_PENDING
Definition: crm.h:144
#define PCMK__XE_NODE_STATE
#define PCMK_XA_LAST_GRANTED
Definition: xml_names.h:315
GList * running_on
Definition: resources.h:456
CRM_TRACE_INIT_DATA(pe_status)
Agent or dependency not available locally.
Definition: results.h:336
gboolean maintenance
Definition: nodes.h:105
#define pcmk_ok
Definition: results.h:65
void pe__clear_resource_history(pcmk_resource_t *rsc, const pcmk_node_t *node)
Definition: pe_actions.c:1630
#define PCMK__XA_CALL_ID
GHashTable * digest_cache
Definition: nodes.h:145
#define pcmk__set_action_flags(action, flags_to_set)
void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index)
Definition: unpack.c:2657
GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler)
Definition: unpack.c:5164
#define PCMK_VALUE_STOP
Definition: options.h:209
gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
Definition: unpack.c:1437
Action is in progress.
Definition: results.h:328
#define PCMK__OPT_REMOVE_AFTER_STOP
void destroy_ticket(gpointer data)
Definition: utils.c:505
void pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler)
Definition: unpack.c:946
const char * pcmk__readable_interval(guint interval_ms)
Definition: iso8601.c:2206
pcmk_node_t * pending_node
Definition: resources.h:480
#define SUB_XPATH_LRM_RESOURCE
Definition: unpack.c:2962
enum action_fail_response pcmk__parse_on_fail(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, const char *value)
Definition: pe_actions.c:890
gboolean crm_is_true(const char *s)
Definition: strings.c:500
#define PCMK__META_CONTAINER
#define CRM_ATTR_SITE_NAME
Definition: crm.h:105
#define PCMK_ACTION_OFF
Definition: actions.h:63
Resource role is unknown.
Definition: roles.h:35
#define PCMK__META_MIGRATE_SOURCE
#define PCMK_VALUE_FREEZE
Definition: options.h:155
unsigned long long flags
Definition: scheduler.h:211
const char * parent
Definition: cib.c:27
Action cannot be attempted (e.g. shutdown)
Definition: results.h:338
enum rsc_role_e pcmk__role_after_failure(const pcmk_resource_t *rsc, const char *action_name, enum action_fail_response on_fail, GHashTable *meta)
Definition: pe_actions.c:1031
gboolean standby_onfail
Definition: nodes.h:84
xmlNode * pcmk__xe_create(xmlNode *parent, const char *name)
Definition: xml.c:770
#define pcmk__assert_alloc(nmemb, size)
Definition: internal.h:297
time_t get_effective_time(pcmk_scheduler_t *scheduler)
Definition: utils.c:395
void freeXpathObject(xmlXPathObjectPtr xpathObj)
Definition: xpath.c:39
#define PCMK_VALUE_OFFLINE
Definition: options.h:183
#define PCMK_XE_INSTANCE_ATTRIBUTES
Definition: xml_names.h:122
#define CRM_ATTR_ID
Definition: crm.h:100
gboolean unclean
Definition: nodes.h:92
unsigned int timeout
Definition: pcmk_fence.c:32
xmlNode * pcmk__xe_next_same(const xmlNode *node)
Definition: xml.c:2130
void pe__add_param_check(const xmlNode *rsc_op, pcmk_resource_t *rsc, pcmk_node_t *node, enum pcmk__check_parameters, pcmk_scheduler_t *scheduler)
Definition: remote.c:189
#define XPATH_ENABLE_UNFENCING
Definition: unpack.c:193
enum node_type type
Definition: nodes.h:75
#define PCMK_VALUE_REMOTE
Definition: options.h:198
crm_time_t * now
Definition: scheduler.h:198
Execution failed, may be retried.
Definition: results.h:333
#define crm_info(fmt, args...)
Definition: logging.h:399
#define pcmk__clear_rsc_flags(resource, flags_to_clear)
void pcmk__insert_dup(GHashTable *table, const char *name, const char *value)
Definition: strings.c:713
#define pcmk__set_scheduler_flags(scheduler, flags_to_set)
GHashTable * template_rsc_sets
Definition: scheduler.h:248
#define PCMK_VALUE_DEMOTE
Definition: options.h:145
GHashTable * pcmk__unpack_action_meta(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *action_name, guint interval_ms, const xmlNode *action_config)
Definition: pe_actions.c:702
pcmk_node_t * dc_node
Definition: scheduler.h:203
#define PCMK__XA_NODE_FENCED
gboolean online
Definition: nodes.h:81
char * clone_strip(const char *last_rsc_id)
Definition: unpack.c:1988
GList * stop_needed
Definition: scheduler.h:257
#define PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
Definition: options.h:68
enum pcmk__digest_result rc
#define PCMK__XA_NODE_IN_MAINTENANCE
pcmk_resource_t * remote_rsc
Definition: nodes.h:136
pcmk_node_t * partial_migration_source
Definition: resources.h:453
#define PCMK_ACTION_NOTIFY
Definition: actions.h:62
#define PCMK_SCORE_INFINITY
Integer score to use to represent "infinity".
Definition: scores.h:24
GHashTable * allowed_nodes
Definition: resources.h:462
Where resource is running.