pacemaker  3.0.0-d8340737c4
Scalable High-Availability cluster resource manager
unpack.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2024 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU Lesser General Public License
7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <stdio.h>
13 #include <string.h>
14 #include <glib.h>
15 #include <time.h>
16 
17 #include <crm/crm.h>
18 #include <crm/services.h>
19 #include <crm/common/xml.h>
21 
22 #include <crm/common/util.h>
23 #include <crm/pengine/rules.h>
24 #include <crm/pengine/internal.h>
25 #include <pe_status_private.h>
26 
27 CRM_TRACE_INIT_DATA(pe_status);
28 
29 // A (parsed) resource action history entry
30 struct action_history {
31  pcmk_resource_t *rsc; // Resource that history is for
32  pcmk_node_t *node; // Node that history is for
33  xmlNode *xml; // History entry XML
34 
35  // Parsed from entry XML
36  const char *id; // XML ID of history entry
37  const char *key; // Operation key of action
38  const char *task; // Action name
39  const char *exit_reason; // Exit reason given for result
40  guint interval_ms; // Action interval
41  int call_id; // Call ID of action
42  int expected_exit_status; // Expected exit status of action
43  int exit_status; // Actual exit status of action
44  int execution_status; // Execution status of action
45 };
46 
47 /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
48  * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
49  * flag is stringified more readably in log messages.
50  */
51 #define set_config_flag(scheduler, option, flag) do { \
52  GHashTable *config_hash = (scheduler)->priv->options; \
53  const char *scf_value = pcmk__cluster_option(config_hash, (option)); \
54  \
55  if (scf_value != NULL) { \
56  if (crm_is_true(scf_value)) { \
57  (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \
58  LOG_TRACE, "Scheduler", \
59  crm_system_name, (scheduler)->flags, \
60  (flag), #flag); \
61  } else { \
62  (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
63  LOG_TRACE, "Scheduler", \
64  crm_system_name, (scheduler)->flags, \
65  (flag), #flag); \
66  } \
67  } \
68  } while(0)
69 
70 static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
71  xmlNode *xml_op, xmlNode **last_failure,
72  enum pcmk__on_fail *failed);
73 static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
74  pcmk_node_t *this_node);
75 static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
76  bool overwrite, pcmk_scheduler_t *scheduler);
77 static void determine_online_status(const xmlNode *node_state,
78  pcmk_node_t *this_node,
80 
81 static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
83 
84 
94 static bool
95 is_dangling_guest_node(pcmk_node_t *node)
96 {
97  return pcmk__is_pacemaker_remote_node(node)
98  && (node->priv->remote != NULL)
99  && (node->priv->remote->priv->launcher == NULL)
100  && pcmk_is_set(node->priv->remote->flags,
102 }
103 
113 void
115  const char *reason, bool priority_delay)
116 {
117  CRM_CHECK(node, return);
118 
119  if (pcmk__is_guest_or_bundle_node(node)) {
120  // Fence a guest or bundle node by marking its launcher as failed
121  pcmk_resource_t *rsc = node->priv->remote->priv->launcher;
122 
123  if (!pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
124  if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
125  crm_notice("Not fencing guest node %s "
126  "(otherwise would because %s): "
127  "its guest resource %s is unmanaged",
128  pcmk__node_name(node), reason, rsc->id);
129  } else {
131  "Guest node %s will be fenced "
132  "(by recovering its guest resource %s): %s",
133  pcmk__node_name(node), rsc->id, reason);
134 
135  /* We don't mark the node as unclean because that would prevent the
136  * node from running resources. We want to allow it to run resources
137  * in this transition if the recovery succeeds.
138  */
142  }
143  }
144 
145  } else if (is_dangling_guest_node(node)) {
146  crm_info("Cleaning up dangling connection for guest node %s: "
147  "fencing was already done because %s, "
148  "and guest resource no longer exists",
149  pcmk__node_name(node), reason);
152 
153  } else if (pcmk__is_remote_node(node)) {
154  pcmk_resource_t *rsc = node->priv->remote;
155 
156  if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
157  crm_notice("Not fencing remote node %s "
158  "(otherwise would because %s): connection is unmanaged",
159  pcmk__node_name(node), reason);
160  } else if (!pcmk_is_set(node->priv->flags, pcmk__node_remote_reset)) {
162  pcmk__sched_warn(scheduler, "Remote node %s %s: %s",
163  pcmk__node_name(node),
164  pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
165  reason);
166  }
167  node->details->unclean = TRUE;
168  // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
169  pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
170 
171  } else if (node->details->unclean) {
172  crm_trace("Cluster node %s %s because %s",
173  pcmk__node_name(node),
174  pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
175  reason);
176 
177  } else {
178  pcmk__sched_warn(scheduler, "Cluster node %s %s: %s",
179  pcmk__node_name(node),
180  pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
181  reason);
182  node->details->unclean = TRUE;
183  pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
184  }
185 }
186 
187 // @TODO xpaths can't handle templates, rules, or id-refs
188 
189 // nvpair with provides or requires set to unfencing
190 #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \
191  "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'" \
192  "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \
193  "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
194 
195 // unfencing in rsc_defaults or any resource
196 #define XPATH_ENABLE_UNFENCING \
197  "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \
198  "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \
199  "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
200  "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
201 
202 static void
203 set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
204 {
205  xmlXPathObjectPtr result = NULL;
206 
207  if (!pcmk_is_set(scheduler->flags, flag)) {
208  result = xpath_search(scheduler->input, xpath);
209  if (result && (numXpathResults(result) > 0)) {
211  }
213  }
214 }
215 
216 gboolean
218 {
219  const char *value = NULL;
220  GHashTable *config_hash = pcmk__strkey_table(free, free);
221 
222  pe_rule_eval_data_t rule_data = {
223  .node_hash = NULL,
224  .now = scheduler->priv->now,
225  .match_data = NULL,
226  .rsc_data = NULL,
227  .op_data = NULL
228  };
229 
230  scheduler->priv->options = config_hash;
231 
234  scheduler);
235 
236  pcmk__validate_cluster_options(config_hash);
237 
241  crm_info("Startup probes: disabled (dangerous)");
242  }
243 
244  value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
245  if (value && crm_is_true(value)) {
246  crm_info("Watchdog-based self-fencing will be performed via SBD if "
247  "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
248  " is nonzero");
250  }
251 
252  /* Set certain flags via xpath here, so they can be used before the relevant
253  * configuration sections are unpacked.
254  */
256  scheduler);
257 
258  value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT);
260 
261  crm_debug("Default fencing action timeout: %s",
263 
267  crm_debug("STONITH of failed nodes is enabled");
268  } else {
269  crm_debug("STONITH of failed nodes is disabled");
270  }
271 
274  crm_trace("STONITH will %s nodes", scheduler->priv->fence_action);
275 
279  crm_debug("Concurrent fencing is enabled");
280  } else {
281  crm_debug("Concurrent fencing is disabled");
282  }
283 
285  if (value) {
288  crm_trace("Priority fencing delay is %s",
290  }
291 
294  crm_debug("Stop all active resources: %s",
295  pcmk__flag_text(scheduler->flags, pcmk__sched_stop_all));
296 
300  crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
301  }
302 
303  value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
304 
305  if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
307 
308  } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
310 
311  } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
313 
314  } else if (pcmk__strcase_any_of(value, PCMK_VALUE_FENCE,
315  PCMK_VALUE_FENCE_LEGACY, NULL)) {
317  int do_panic = 0;
318 
320  &do_panic);
321  if (do_panic
324  } else {
326  " to 'stop': cluster has never had quorum");
328  }
329  } else {
331  " to 'stop' because fencing is disabled");
333  }
334 
335  } else {
337  }
338 
339  switch (scheduler->no_quorum_policy) {
341  crm_debug("On loss of quorum: Freeze resources");
342  break;
343  case pcmk_no_quorum_stop:
344  crm_debug("On loss of quorum: Stop ALL resources");
345  break;
347  crm_debug("On loss of quorum: "
348  "Demote promotable resources and stop other resources");
349  break;
351  crm_notice("On loss of quorum: Fence all remaining nodes");
352  break;
354  crm_notice("On loss of quorum: Ignore");
355  break;
356  }
357 
361  crm_trace("Orphan resources are stopped");
362  } else {
363  crm_trace("Orphan resources are ignored");
364  }
365 
369  crm_trace("Orphan resource actions are stopped");
370  } else {
371  crm_trace("Orphan resource actions are ignored");
372  }
373 
376  crm_trace("Maintenance mode: %s",
377  pcmk__flag_text(scheduler->flags, pcmk__sched_in_maintenance));
378 
382  crm_trace("Start failures are always fatal");
383  } else {
384  crm_trace("Start failures are handled by failcount");
385  }
386 
390  }
392  crm_trace("Unseen nodes will be fenced");
393  } else {
395  "Blind faith: not fencing unseen nodes");
396  }
397 
399 
402  crm_trace("Placement strategy: %s", scheduler->priv->placement_strategy);
403 
409  crm_trace("Resources will be locked to nodes that were cleanly "
410  "shut down (locks expire after %s)",
412  } else {
413  crm_trace("Resources will not be locked to nodes that were cleanly "
414  "shut down");
415  }
416 
419  if (scheduler->priv->node_pending_ms == 0U) {
420  crm_trace("Do not fence pending nodes");
421  } else {
422  crm_trace("Fence pending nodes after %s",
424  }
425 
426  return TRUE;
427 }
428 
443 pcmk_node_t *
444 pe_create_node(const char *id, const char *uname, const char *type,
445  int score, pcmk_scheduler_t *scheduler)
446 {
448  pcmk_node_t *new_node = NULL;
449 
450  if (pcmk_find_node(scheduler, uname) != NULL) {
451  pcmk__config_warn("More than one node entry has name '%s'", uname);
452  }
453 
454  if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
456  variant = pcmk__node_variant_cluster;
457 
458  } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
459  variant = pcmk__node_variant_remote;
460 
461  } else {
462  pcmk__config_err("Ignoring node %s with unrecognized type '%s'",
463  pcmk__s(uname, "without name"), type);
464  return NULL;
465  }
466 
467  new_node = calloc(1, sizeof(pcmk_node_t));
468  if (new_node == NULL) {
469  pcmk__sched_err(scheduler, "Could not allocate memory for node %s",
470  uname);
471  return NULL;
472  }
473 
474  new_node->assign = calloc(1, sizeof(struct pcmk__node_assignment));
475  new_node->details = calloc(1, sizeof(struct pcmk__node_details));
476  new_node->priv = calloc(1, sizeof(pcmk__node_private_t));
477  if ((new_node->assign == NULL) || (new_node->details == NULL)
478  || (new_node->priv == NULL)) {
479  free(new_node->assign);
480  free(new_node->details);
481  free(new_node->priv);
482  free(new_node);
483  pcmk__sched_err(scheduler, "Could not allocate memory for node %s",
484  uname);
485  return NULL;
486  }
487 
488  crm_trace("Creating node for entry %s/%s", uname, id);
489  new_node->assign->score = score;
490  new_node->priv->id = id;
491  new_node->priv->name = uname;
492  new_node->priv->flags = pcmk__node_probes_allowed;
493  new_node->details->online = FALSE;
494  new_node->details->shutdown = FALSE;
495  new_node->details->running_rsc = NULL;
496  new_node->priv->scheduler = scheduler;
497  new_node->priv->variant = variant;
498  new_node->priv->attrs = pcmk__strkey_table(free, free);
499  new_node->priv->utilization = pcmk__strkey_table(free, free);
501 
502  if (pcmk__is_pacemaker_remote_node(new_node)) {
503  pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "remote");
505  } else {
506  pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "cluster");
507  }
508 
509  scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
511  return new_node;
512 }
513 
514 static const char *
515 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
516 {
517  xmlNode *attr_set = NULL;
518  xmlNode *attr = NULL;
519 
520  const char *container_id = pcmk__xe_id(xml_obj);
521  const char *remote_name = NULL;
522  const char *remote_server = NULL;
523  const char *remote_port = NULL;
524  const char *connect_timeout = "60s";
525  const char *remote_allow_migrate=NULL;
526  const char *is_managed = NULL;
527 
528  for (attr_set = pcmk__xe_first_child(xml_obj, PCMK_XE_META_ATTRIBUTES,
529  NULL, NULL);
530  attr_set != NULL;
531  attr_set = pcmk__xe_next(attr_set, PCMK_XE_META_ATTRIBUTES)) {
532 
533  for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
534  attr != NULL; attr = pcmk__xe_next(attr, NULL)) {
535 
536  const char *value = crm_element_value(attr, PCMK_XA_VALUE);
537  const char *name = crm_element_value(attr, PCMK_XA_NAME);
538 
539  if (name == NULL) { // Sanity
540  continue;
541  }
542 
543  if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
544  remote_name = value;
545 
546  } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
547  remote_server = value;
548 
549  } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
550  remote_port = value;
551 
552  } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
553  connect_timeout = value;
554 
555  } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
556  remote_allow_migrate = value;
557 
558  } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
559  is_managed = value;
560  }
561  }
562  }
563 
564  if (remote_name == NULL) {
565  return NULL;
566  }
567 
568  if (pe_find_resource(data->priv->resources, remote_name) != NULL) {
569  return NULL;
570  }
571 
572  pe_create_remote_xml(parent, remote_name, container_id,
573  remote_allow_migrate, is_managed,
574  connect_timeout, remote_server, remote_port);
575  return remote_name;
576 }
577 
578 static void
579 handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
580 {
581  if ((new_node->priv->variant == pcmk__node_variant_remote)
582  && (new_node->priv->remote == NULL)) {
583  /* Ignore fencing for remote nodes that don't have a connection resource
584  * associated with them. This happens when remote node entries get left
585  * in the nodes section after the connection resource is removed.
586  */
587  return;
588  }
589 
591  // All nodes are unclean until we've seen their status entry
592  new_node->details->unclean = TRUE;
593 
594  } else {
595  // Blind faith ...
596  new_node->details->unclean = FALSE;
597  }
598 }
599 
600 gboolean
602 {
603  xmlNode *xml_obj = NULL;
604  pcmk_node_t *new_node = NULL;
605  const char *id = NULL;
606  const char *uname = NULL;
607  const char *type = NULL;
608 
609  for (xml_obj = pcmk__xe_first_child(xml_nodes, PCMK_XE_NODE, NULL, NULL);
610  xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, PCMK_XE_NODE)) {
611 
612  int score = 0;
613  int rc = pcmk__xe_get_score(xml_obj, PCMK_XA_SCORE, &score, 0);
614 
615  new_node = NULL;
616 
617  id = crm_element_value(xml_obj, PCMK_XA_ID);
619  type = crm_element_value(xml_obj, PCMK_XA_TYPE);
620  crm_trace("Processing node %s/%s", uname, id);
621 
622  if (id == NULL) {
623  pcmk__config_err("Ignoring <" PCMK_XE_NODE
624  "> entry in configuration without id");
625  continue;
626  }
627  if (rc != pcmk_rc_ok) {
628  // Not possible with schema validation enabled
629  pcmk__config_warn("Using 0 as score for node %s "
630  "because '%s' is not a valid score: %s",
631  pcmk__s(uname, "without name"),
633  pcmk_rc_str(rc));
634  }
635  new_node = pe_create_node(id, uname, type, score, scheduler);
636 
637  if (new_node == NULL) {
638  return FALSE;
639  }
640 
641  handle_startup_fencing(scheduler, new_node);
642 
643  add_node_attrs(xml_obj, new_node, FALSE, scheduler);
644 
645  crm_trace("Done with node %s",
646  crm_element_value(xml_obj, PCMK_XA_UNAME));
647  }
648 
649  return TRUE;
650 }
651 
652 static void
653 unpack_launcher(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
654 {
655  const char *launcher_id = NULL;
656 
657  if (rsc->priv->children != NULL) {
658  g_list_foreach(rsc->priv->children, (GFunc) unpack_launcher,
659  scheduler);
660  return;
661  }
662 
663  launcher_id = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CONTAINER);
664  if ((launcher_id != NULL)
665  && !pcmk__str_eq(launcher_id, rsc->id, pcmk__str_none)) {
667  launcher_id);
668 
669  if (launcher != NULL) {
670  rsc->priv->launcher = launcher;
671  launcher->priv->launched =
672  g_list_append(launcher->priv->launched, rsc);
673  pcmk__rsc_trace(rsc, "Resource %s's launcher is %s",
674  rsc->id, launcher_id);
675  } else {
676  pcmk__config_err("Resource %s: Unknown " PCMK__META_CONTAINER " %s",
677  rsc->id, launcher_id);
678  }
679  }
680 }
681 
682 gboolean
684 {
685  xmlNode *xml_obj = NULL;
686 
687  /* Create remote nodes and guest nodes from the resource configuration
688  * before unpacking resources.
689  */
690  for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
691  xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
692 
693  const char *new_node_id = NULL;
694 
695  /* Check for remote nodes, which are defined by ocf:pacemaker:remote
696  * primitives.
697  */
698  if (xml_contains_remote_node(xml_obj)) {
699  new_node_id = pcmk__xe_id(xml_obj);
700  /* The pcmk_find_node() check ensures we don't iterate over an
701  * expanded node that has already been added to the node list
702  */
703  if (new_node_id
704  && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
705  crm_trace("Found remote node %s defined by resource %s",
706  new_node_id, pcmk__xe_id(xml_obj));
707  pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
708  0, scheduler);
709  }
710  continue;
711  }
712 
713  /* Check for guest nodes, which are defined by special meta-attributes
714  * of a primitive of any type (for example, VirtualDomain or Xen).
715  */
716  if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
717  /* This will add an ocf:pacemaker:remote primitive to the
718  * configuration for the guest node's connection, to be unpacked
719  * later.
720  */
721  new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
722  scheduler);
723  if (new_node_id
724  && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
725  crm_trace("Found guest node %s in resource %s",
726  new_node_id, pcmk__xe_id(xml_obj));
727  pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
728  0, scheduler);
729  }
730  continue;
731  }
732 
733  /* Check for guest nodes inside a group. Clones are currently not
734  * supported as guest nodes.
735  */
736  if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
737  xmlNode *xml_obj2 = NULL;
738  for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
739  xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2, NULL)) {
740 
741  new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
742  scheduler);
743 
744  if (new_node_id
745  && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
746  crm_trace("Found guest node %s in resource %s inside group %s",
747  new_node_id, pcmk__xe_id(xml_obj2),
748  pcmk__xe_id(xml_obj));
749  pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
750  0, scheduler);
751  }
752  }
753  }
754  }
755  return TRUE;
756 }
757 
758 /* Call this after all the nodes and resources have been
759  * unpacked, but before the status section is read.
760  *
761  * A remote node's online status is reflected by the state
762  * of the remote node's connection resource. We need to link
763  * the remote node to this connection resource so we can have
764  * easy access to the connection resource during the scheduler calculations.
765  */
766 static void
767 link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
768 {
769  pcmk_node_t *remote_node = NULL;
770 
772  return;
773  }
774 
776  /* remote_nodes and remote_resources are not linked in quick location calculations */
777  return;
778  }
779 
780  remote_node = pcmk_find_node(scheduler, new_rsc->id);
781  CRM_CHECK(remote_node != NULL, return);
782 
783  pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
784  new_rsc->id, pcmk__node_name(remote_node));
785  remote_node->priv->remote = new_rsc;
786 
787  if (new_rsc->priv->launcher == NULL) {
788  /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
789  * the same as is done for cluster nodes.
790  */
791  handle_startup_fencing(scheduler, remote_node);
792 
793  } else {
794  /* pe_create_node() marks the new node as "remote" or "cluster"; now
795  * that we know the node is a guest node, update it correctly.
796  */
797  pcmk__insert_dup(remote_node->priv->attrs,
798  CRM_ATTR_KIND, "container");
799  }
800 }
801 
814 gboolean
815 unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
816 {
817  xmlNode *xml_obj = NULL;
818  GList *gIter = NULL;
819 
821 
822  for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
823  xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj, NULL)) {
824 
825  pcmk_resource_t *new_rsc = NULL;
826  const char *id = pcmk__xe_id(xml_obj);
827 
828  if (pcmk__str_empty(id)) {
829  pcmk__config_err("Ignoring <%s> resource without ID",
830  xml_obj->name);
831  continue;
832  }
833 
834  if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
835  if (g_hash_table_lookup_extended(scheduler->priv->templates, id,
836  NULL, NULL) == FALSE) {
837  /* Record the template's ID for the knowledge of its existence anyway. */
839  }
840  continue;
841  }
842 
843  crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
844  if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
845  scheduler) == pcmk_rc_ok) {
847  g_list_append(scheduler->priv->resources, new_rsc);
848  pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
849 
850  } else {
851  pcmk__config_err("Ignoring <%s> resource '%s' "
852  "because configuration is invalid",
853  xml_obj->name, id);
854  }
855  }
856 
857  for (gIter = scheduler->priv->resources;
858  gIter != NULL; gIter = gIter->next) {
859 
860  pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
861 
862  unpack_launcher(rsc, scheduler);
863  link_rsc2remotenode(scheduler, rsc);
864  }
865 
866  scheduler->priv->resources = g_list_sort(scheduler->priv->resources,
869  /* Ignore */
870 
873 
874  pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
875  pcmk__config_err("Either configure some or disable STONITH with the "
876  PCMK_OPT_STONITH_ENABLED " option");
877  pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
878  }
879 
880  return TRUE;
881 }
882 
889 void
891 {
892  if (xml == NULL) {
893  return;
894  }
895 
896  CRM_CHECK(pcmk__xe_is(xml, PCMK_XE_FENCING_TOPOLOGY), return);
897 
898  for (const xmlNode *level = pcmk__xe_first_child(xml, PCMK_XE_FENCING_LEVEL,
899  NULL, NULL);
900  level != NULL; level = pcmk__xe_next(level, PCMK_XE_FENCING_LEVEL)) {
901 
902  const char *id = pcmk__xe_id(level);
903  int index = 0;
904 
905  if (pcmk__str_empty(id)) {
906  pcmk__config_err("Ignoring fencing level without ID");
907  continue;
908  }
909 
910  if (crm_element_value_int(level, PCMK_XA_INDEX, &index) != 0) {
911  pcmk__config_err("Ignoring fencing level %s with invalid index",
912  id);
913  continue;
914  }
915 
916  if ((index < ST__LEVEL_MIN) || (index > ST__LEVEL_MAX)) {
917  pcmk__config_err("Ignoring fencing level %s with out-of-range "
918  "index %d",
919  id, index);
920  }
921  }
922 }
923 
924 gboolean
926 {
927  xmlNode *xml_tag = NULL;
928 
930 
931  for (xml_tag = pcmk__xe_first_child(xml_tags, PCMK_XE_TAG, NULL, NULL);
932  xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag, PCMK_XE_TAG)) {
933 
934  xmlNode *xml_obj_ref = NULL;
935  const char *tag_id = pcmk__xe_id(xml_tag);
936 
937  if (tag_id == NULL) {
938  pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
939  (const char *) xml_tag->name);
940  continue;
941  }
942 
943  for (xml_obj_ref = pcmk__xe_first_child(xml_tag, PCMK_XE_OBJ_REF,
944  NULL, NULL);
945  xml_obj_ref != NULL;
946  xml_obj_ref = pcmk__xe_next(xml_obj_ref, PCMK_XE_OBJ_REF)) {
947 
948  const char *obj_ref = pcmk__xe_id(xml_obj_ref);
949 
950  if (obj_ref == NULL) {
951  pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
952  xml_obj_ref->name, tag_id);
953  continue;
954  }
955 
956  pcmk__add_idref(scheduler->priv->tags, tag_id, obj_ref);
957  }
958  }
959 
960  return TRUE;
961 }
962 
972 static int
973 unpack_ticket_state(xmlNode *xml_ticket, void *userdata)
974 {
975  pcmk_scheduler_t *scheduler = userdata;
976 
977  const char *ticket_id = NULL;
978  const char *granted = NULL;
979  const char *last_granted = NULL;
980  const char *standby = NULL;
981  xmlAttrPtr xIter = NULL;
982 
983  pcmk__ticket_t *ticket = NULL;
984 
985  ticket_id = pcmk__xe_id(xml_ticket);
986  if (pcmk__str_empty(ticket_id)) {
987  return pcmk_rc_ok;
988  }
989 
990  crm_trace("Processing ticket state for %s", ticket_id);
991 
992  ticket = g_hash_table_lookup(scheduler->priv->ticket_constraints,
993  ticket_id);
994  if (ticket == NULL) {
995  ticket = ticket_new(ticket_id, scheduler);
996  if (ticket == NULL) {
997  return pcmk_rc_ok;
998  }
999  }
1000 
1001  for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
1002  const char *prop_name = (const char *)xIter->name;
1003  const char *prop_value = pcmk__xml_attr_value(xIter);
1004 
1005  if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
1006  continue;
1007  }
1008  pcmk__insert_dup(ticket->state, prop_name, prop_value);
1009  }
1010 
1011  granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
1012  if (granted && crm_is_true(granted)) {
1014  crm_info("We have ticket '%s'", ticket->id);
1015  } else {
1017  crm_info("We do not have ticket '%s'", ticket->id);
1018  }
1019 
1020  last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
1021  if (last_granted) {
1022  long long last_granted_ll = 0LL;
1023  int rc = pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
1024 
1025  if (rc != pcmk_rc_ok) {
1026  crm_warn("Using %lld instead of invalid " PCMK_XA_LAST_GRANTED
1027  " value '%s' in state for ticket %s: %s",
1028  last_granted_ll, last_granted, ticket->id,
1029  pcmk_rc_str(rc));
1030  }
1031  ticket->last_granted = (time_t) last_granted_ll;
1032  }
1033 
1034  standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
1035  if (standby && crm_is_true(standby)) {
1037  if (pcmk_is_set(ticket->flags, pcmk__ticket_granted)) {
1038  crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
1039  }
1040  } else {
1042  }
1043 
1044  crm_trace("Done with ticket state for %s", ticket_id);
1045 
1046  return pcmk_rc_ok;
1047 }
1048 
1049 static void
1050 unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
1052 {
1053  const char *discovery = NULL;
1054  const xmlNode *attrs = NULL;
1055  pcmk_resource_t *rsc = NULL;
1056  int maint = 0;
1057 
1058  if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1059  return;
1060  }
1061 
1062  if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
1063  return;
1064  }
1065  crm_trace("Processing Pacemaker Remote node %s",
1066  pcmk__node_name(this_node));
1067 
1069  &maint, 0);
1070  if (maint) {
1072  } else {
1074  }
1075 
1076  rsc = this_node->priv->remote;
1077  if (!pcmk_is_set(this_node->priv->flags, pcmk__node_remote_reset)) {
1078  this_node->details->unclean = FALSE;
1080  }
1082  NULL);
1083  add_node_attrs(attrs, this_node, TRUE, scheduler);
1084 
1085  if (pe__shutdown_requested(this_node)) {
1086  crm_info("%s is shutting down", pcmk__node_name(this_node));
1087  this_node->details->shutdown = TRUE;
1088  }
1089 
1090  if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL,
1092  crm_info("%s is in standby mode", pcmk__node_name(this_node));
1094  }
1095 
1098  || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed))) {
1099  crm_info("%s is in maintenance mode", pcmk__node_name(this_node));
1100  this_node->details->maintenance = TRUE;
1101  }
1102 
1103  discovery = pcmk__node_attr(this_node,
1105  NULL, pcmk__rsc_node_current);
1106  if ((discovery != NULL) && !crm_is_true(discovery)) {
1108  "Support for the "
1110  " node attribute is deprecated and will be removed"
1111  " (and behave as 'true') in a future release.");
1112 
1113  if (pcmk__is_remote_node(this_node)
1115  pcmk__config_warn("Ignoring "
1117  " attribute on Pacemaker Remote node %s"
1118  " because fencing is disabled",
1119  pcmk__node_name(this_node));
1120  } else {
1121  /* This is either a remote node with fencing enabled, or a guest
1122  * node. We don't care whether fencing is enabled when fencing guest
1123  * nodes, because they are "fenced" by recovering their containing
1124  * resource.
1125  */
1126  crm_info("%s has resource discovery disabled",
1127  pcmk__node_name(this_node));
1129  }
1130  }
1131 }
1132 
1141 static void
1142 unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
1144 {
1145  const char *discovery = NULL;
1146  const xmlNode *attrs = pcmk__xe_first_child(state,
1148  NULL, NULL);
1149 
1150  add_node_attrs(attrs, node, TRUE, scheduler);
1151 
1154  crm_info("%s is in standby mode", pcmk__node_name(node));
1156  }
1157 
1160  crm_info("%s is in maintenance mode", pcmk__node_name(node));
1161  node->details->maintenance = TRUE;
1162  }
1163 
1164  discovery = pcmk__node_attr(node,
1166  NULL, pcmk__rsc_node_current);
1167  if ((discovery != NULL) && !crm_is_true(discovery)) {
1168  pcmk__config_warn("Ignoring "
1170  " attribute for %s because disabling resource"
1171  " discovery is not allowed for cluster nodes",
1172  pcmk__node_name(node));
1173  }
1174 }
1175 
1188 static void
1189 unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
1190 {
1191  const char *id = NULL;
1192  const char *uname = NULL;
1193  pcmk_node_t *this_node = NULL;
1194 
1195  id = crm_element_value(state, PCMK_XA_ID);
1196  if (id == NULL) {
1197  pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
1198  PCMK_XA_ID);
1199  crm_log_xml_info(state, "missing-id");
1200  return;
1201  }
1202 
1204  if (uname == NULL) {
1205  /* If a joining peer makes the cluster acquire the quorum from Corosync
1206  * but has not joined the controller CPG membership yet, it's possible
1207  * that the created PCMK__XE_NODE_STATE entry doesn't have a
1208  * PCMK_XA_UNAME yet. Recognize the node as pending and wait for it to
1209  * join CPG.
1210  */
1211  crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
1212  "without " PCMK_XA_UNAME,
1213  id);
1214  }
1215 
1216  this_node = pe_find_node_any(scheduler->nodes, id, uname);
1217  if (this_node == NULL) {
1218  crm_notice("Ignoring recorded state for removed node with name %s and "
1219  PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id);
1220  return;
1221  }
1222 
1223  if (pcmk__is_pacemaker_remote_node(this_node)) {
1224  int remote_fenced = 0;
1225 
1226  /* We can't determine the online status of Pacemaker Remote nodes until
1227  * after all resource history has been unpacked. In this first pass, we
1228  * do need to mark whether the node has been fenced, as this plays a
1229  * role during unpacking cluster node resource state.
1230  */
1232  &remote_fenced, 0);
1233  if (remote_fenced) {
1235  } else {
1237  }
1238  return;
1239  }
1240 
1241  unpack_transient_attributes(state, this_node, scheduler);
1242 
1243  /* Provisionally mark this cluster node as clean. We have at least seen it
1244  * in the current cluster's lifetime.
1245  */
1246  this_node->details->unclean = FALSE;
1248 
1249  crm_trace("Determining online status of cluster node %s (id %s)",
1250  pcmk__node_name(this_node), id);
1251  determine_online_status(state, this_node, scheduler);
1252 
1254  && this_node->details->online
1256  /* Everything else should flow from this automatically
1257  * (at least until the scheduler becomes able to migrate off
1258  * healthy resources)
1259  */
1260  pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1261  FALSE);
1262  }
1263 }
1264 
1282 static int
1283 unpack_node_history(const xmlNode *status, bool fence,
1285 {
1286  int rc = pcmk_rc_ok;
1287 
1288  // Loop through all PCMK__XE_NODE_STATE entries in CIB status
1289  for (const xmlNode *state = pcmk__xe_first_child(status,
1290  PCMK__XE_NODE_STATE, NULL,
1291  NULL);
1292  state != NULL; state = pcmk__xe_next(state, PCMK__XE_NODE_STATE)) {
1293 
1294  const char *id = pcmk__xe_id(state);
1295  const char *uname = crm_element_value(state, PCMK_XA_UNAME);
1296  pcmk_node_t *this_node = NULL;
1297 
1298  if ((id == NULL) || (uname == NULL)) {
1299  // Warning already logged in first pass through status section
1300  crm_trace("Not unpacking resource history from malformed "
1301  PCMK__XE_NODE_STATE " without id and/or uname");
1302  continue;
1303  }
1304 
1305  this_node = pe_find_node_any(scheduler->nodes, id, uname);
1306  if (this_node == NULL) {
1307  // Warning already logged in first pass through status section
1308  crm_trace("Not unpacking resource history for node %s because "
1309  "no longer in configuration", id);
1310  continue;
1311  }
1312 
1313  if (pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1314  crm_trace("Not unpacking resource history for node %s because "
1315  "already unpacked", id);
1316  continue;
1317  }
1318 
1319  if (fence) {
1320  // We're processing all remaining nodes
1321 
1322  } else if (pcmk__is_guest_or_bundle_node(this_node)) {
1323  /* We can unpack a guest node's history only after we've unpacked
1324  * other resource history to the point that we know that the node's
1325  * connection and containing resource are both up.
1326  */
1327  const pcmk_resource_t *remote = this_node->priv->remote;
1328  const pcmk_resource_t *launcher = remote->priv->launcher;
1329 
1330  if ((remote->priv->orig_role != pcmk_role_started)
1331  || (launcher->priv->orig_role != pcmk_role_started)) {
1332  crm_trace("Not unpacking resource history for guest node %s "
1333  "because launcher and connection are not known to "
1334  "be up", id);
1335  continue;
1336  }
1337 
1338  } else if (pcmk__is_remote_node(this_node)) {
1339  /* We can unpack a remote node's history only after we've unpacked
1340  * other resource history to the point that we know that the node's
1341  * connection is up, with the exception of when shutdown locks are
1342  * in use.
1343  */
1344  pcmk_resource_t *rsc = this_node->priv->remote;
1345 
1346  if ((rsc == NULL)
1348  && (rsc->priv->orig_role != pcmk_role_started))) {
1349  crm_trace("Not unpacking resource history for remote node %s "
1350  "because connection is not known to be up", id);
1351  continue;
1352  }
1353 
1354  /* If fencing and shutdown locks are disabled and we're not processing
1355  * unseen nodes, then we don't want to unpack offline nodes until online
1356  * nodes have been unpacked. This allows us to number active clone
1357  * instances first.
1358  */
1359  } else if (!pcmk_any_flags_set(scheduler->flags,
1362  && !this_node->details->online) {
1363  crm_trace("Not unpacking resource history for offline "
1364  "cluster node %s", id);
1365  continue;
1366  }
1367 
1368  if (pcmk__is_pacemaker_remote_node(this_node)) {
1369  determine_remote_online_status(scheduler, this_node);
1370  unpack_handle_remote_attrs(this_node, state, scheduler);
1371  }
1372 
1373  crm_trace("Unpacking resource history for %snode %s",
1374  (fence? "unseen " : ""), id);
1375 
1377  unpack_node_lrm(this_node, state, scheduler);
1378 
1379  rc = EAGAIN; // Other node histories might depend on this one
1380  }
1381  return rc;
1382 }
1383 
1384 /* remove nodes that are down, stopping */
1385 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1386 /* anything else? */
1387 gboolean
1389 {
1390  xmlNode *state = NULL;
1391 
1392  crm_trace("Beginning unpack");
1393 
1394  if (scheduler->priv->ticket_constraints == NULL) {
1397  }
1398 
1399  for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
1400  state = pcmk__xe_next(state, NULL)) {
1401 
1402  if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
1404  unpack_ticket_state, scheduler);
1405 
1406  } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
1407  unpack_node_state(state, scheduler);
1408  }
1409  }
1410 
1411  while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1412  crm_trace("Another pass through node resource histories is needed");
1413  }
1414 
1415  // Now catch any nodes we didn't see
1416  unpack_node_history(status,
1419  scheduler);
1420 
1421  /* Now that we know where resources are, we can schedule stops of containers
1422  * with failed bundle connections
1423  */
1424  if (scheduler->priv->stop_needed != NULL) {
1425  for (GList *item = scheduler->priv->stop_needed;
1426  item != NULL; item = item->next) {
1427 
1428  pcmk_resource_t *container = item->data;
1429  pcmk_node_t *node = pcmk__current_node(container);
1430 
1431  if (node) {
1432  stop_action(container, node, FALSE);
1433  }
1434  }
1435  g_list_free(scheduler->priv->stop_needed);
1436  scheduler->priv->stop_needed = NULL;
1437  }
1438 
1439  /* Now that we know status of all Pacemaker Remote connections and nodes,
1440  * we can stop connections for node shutdowns, and check the online status
1441  * of remote/guest nodes that didn't have any node history to unpack.
1442  */
1443  for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1444  pcmk_node_t *this_node = gIter->data;
1445 
1446  if (!pcmk__is_pacemaker_remote_node(this_node)) {
1447  continue;
1448  }
1449  if (this_node->details->shutdown
1450  && (this_node->priv->remote != NULL)) {
1452  "remote shutdown");
1453  }
1454  if (!pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) {
1455  determine_remote_online_status(scheduler, this_node);
1456  }
1457  }
1458 
1459  return TRUE;
1460 }
1461 
1473 static long long
1474 unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
1475 {
1476  const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
1477  int member = 0;
1478 
1479  if (member_time == NULL) {
1480  return -1LL;
1481 
1482  } else if (crm_str_to_boolean(member_time, &member) == 1) {
1483  /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1484  * recorded as a boolean for a DC < 2.1.7, or the node is pending
1485  * shutdown and has left the CPG, in which case it was set to 1 to avoid
1486  * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT.
1487  *
1488  * We return the effective time for in_ccm=1 because what's important to
1489  * avoid fencing is that effective time minus this value is less than
1490  * the pending node timeout.
1491  */
1492  return member? (long long) get_effective_time(scheduler) : 0LL;
1493 
1494  } else {
1495  long long when_member = 0LL;
1496 
1497  if ((pcmk__scan_ll(member_time, &when_member,
1498  0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1499  crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
1500  " in " PCMK__XE_NODE_STATE " entry", member_time);
1501  return -1LL;
1502  }
1503  return when_member;
1504  }
1505 }
1506 
1516 static long long
1517 unpack_node_online(const xmlNode *node_state)
1518 {
1519  const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD);
1520 
1521  // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1522  if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
1524  return 0LL;
1525 
1526  } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
1527  return 1LL;
1528 
1529  } else {
1530  long long when_online = 0LL;
1531 
1532  if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1533  || (when_online < 0)) {
1534  crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
1535  PCMK__XE_NODE_STATE " entry, assuming offline", peer_time);
1536  return 0LL;
1537  }
1538  return when_online;
1539  }
1540 }
1541 
1551 static bool
1552 unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
1553 {
1554  long long value = 0LL;
1555  int value_i = 0;
1556  int rc = pcmk_rc_ok;
1557  const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
1558  NULL, pcmk__rsc_node_current);
1559 
1560  // Value may be boolean or an epoch time
1561  if (crm_str_to_boolean(value_s, &value_i) == 1) {
1562  return (value_i != 0);
1563  }
1564  rc = pcmk__scan_ll(value_s, &value, 0LL);
1565  if (rc == pcmk_rc_ok) {
1566  return (value > 0);
1567  }
1568  crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1569  "node attribute for %s: %s",
1570  value_s, pcmk__node_name(node), pcmk_rc_str(rc));
1571  return false;
1572 }
1573 
1574 static gboolean
1575 determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
1576  const xmlNode *node_state,
1577  pcmk_node_t *this_node)
1578 {
1579  gboolean online = FALSE;
1580  const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1581  const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1582  long long when_member = unpack_node_member(node_state, scheduler);
1583  long long when_online = unpack_node_online(node_state);
1584 
1585  if (when_member <= 0) {
1586  crm_trace("Node %s is %sdown", pcmk__node_name(this_node),
1587  ((when_member < 0)? "presumed " : ""));
1588 
1589  } else if (when_online > 0) {
1590  if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1591  online = TRUE;
1592  } else {
1593  crm_debug("Node %s is not ready to run resources: %s",
1594  pcmk__node_name(this_node), join);
1595  }
1596 
1597  } else if (!pcmk_is_set(this_node->priv->flags,
1599  crm_trace("Node %s controller is down: "
1600  "member@%lld online@%lld join=%s expected=%s",
1601  pcmk__node_name(this_node), when_member, when_online,
1602  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1603 
1604  } else {
1605  /* mark it unclean */
1606  pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1607  crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
1608  pcmk__node_name(this_node), when_member, when_online,
1609  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1610  }
1611  return online;
1612 }
1613 
1627 static inline bool
1628 pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1629  long long when_member, long long when_online)
1630 {
1631  if ((scheduler->priv->node_pending_ms > 0U)
1632  && (when_member > 0) && (when_online <= 0)) {
1633  // There is a timeout on pending nodes, and node is pending
1634 
1635  time_t timeout = when_member
1637 
1638  if (get_effective_time(node->priv->scheduler) >= timeout) {
1639  return true; // Node has timed out
1640  }
1641 
1642  // Node is pending, but still has time
1643  pe__update_recheck_time(timeout, scheduler, "pending node timeout");
1644  }
1645  return false;
1646 }
1647 
1648 static bool
1649 determine_online_status_fencing(pcmk_scheduler_t *scheduler,
1650  const xmlNode *node_state,
1651  pcmk_node_t *this_node)
1652 {
1653  bool termination_requested = unpack_node_terminate(this_node, node_state);
1654  const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1655  const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1656  long long when_member = unpack_node_member(node_state, scheduler);
1657  long long when_online = unpack_node_online(node_state);
1658 
1659 /*
1660  - PCMK__XA_JOIN ::= member|down|pending|banned
1661  - PCMK_XA_EXPECTED ::= member|down
1662 
1663  @COMPAT with entries recorded for DCs < 2.1.7
1664  - PCMK__XA_IN_CCM ::= true|false
1665  - PCMK_XA_CRMD ::= online|offline
1666 
1667  Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1668  - PCMK__XA_IN_CCM ::= <timestamp>|0
1669  Since when node has been a cluster member. A value 0 of means the node is not
1670  a cluster member.
1671 
1672  - PCMK_XA_CRMD ::= <timestamp>|0
1673  Since when peer has been online in CPG. A value 0 means the peer is offline
1674  in CPG.
1675 */
1676 
1677  crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1678  pcmk__node_name(this_node), when_member, when_online,
1679  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1680  (termination_requested? " (termination requested)" : ""));
1681 
1682  if (this_node->details->shutdown) {
1683  crm_debug("%s is shutting down", pcmk__node_name(this_node));
1684 
1685  /* Slightly different criteria since we can't shut down a dead peer */
1686  return (when_online > 0);
1687  }
1688 
1689  if (when_member < 0) {
1690  pe_fence_node(scheduler, this_node,
1691  "peer has not been seen by the cluster", FALSE);
1692  return false;
1693  }
1694 
1695  if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1696  pe_fence_node(scheduler, this_node,
1697  "peer failed Pacemaker membership criteria", FALSE);
1698 
1699  } else if (termination_requested) {
1700  if ((when_member <= 0) && (when_online <= 0)
1701  && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1702  crm_info("%s was fenced as requested", pcmk__node_name(this_node));
1703  return false;
1704  }
1705  pe_fence_node(scheduler, this_node, "fencing was requested", false);
1706 
1707  } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1709 
1710  if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1711  pe_fence_node(scheduler, this_node,
1712  "peer pending timed out on joining the process group",
1713  FALSE);
1714 
1715  } else if ((when_member > 0) || (when_online > 0)) {
1716  crm_info("- %s is not ready to run resources",
1717  pcmk__node_name(this_node));
1719  this_node->details->pending = TRUE;
1720 
1721  } else {
1722  crm_trace("%s is down or still coming up",
1723  pcmk__node_name(this_node));
1724  }
1725 
1726  } else if (when_member <= 0) {
1727  // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
1728  pe_fence_node(scheduler, this_node,
1729  "peer is no longer part of the cluster", TRUE);
1730 
1731  } else if (when_online <= 0) {
1732  pe_fence_node(scheduler, this_node,
1733  "peer process is no longer available", FALSE);
1734 
1735  /* Everything is running at this point, now check join state */
1736 
1737  } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1738  crm_info("%s is active", pcmk__node_name(this_node));
1739 
1740  } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
1741  CRMD_JOINSTATE_DOWN, NULL)) {
1742  crm_info("%s is not ready to run resources",
1743  pcmk__node_name(this_node));
1745  this_node->details->pending = TRUE;
1746 
1747  } else {
1748  pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1749  FALSE);
1750  }
1751 
1752  return (when_member > 0);
1753 }
1754 
1755 static void
1756 determine_remote_online_status(pcmk_scheduler_t *scheduler,
1757  pcmk_node_t *this_node)
1758 {
1759  pcmk_resource_t *rsc = this_node->priv->remote;
1760  pcmk_resource_t *launcher = NULL;
1761  pcmk_node_t *host = NULL;
1762  const char *node_type = "Remote";
1763 
1764  if (rsc == NULL) {
1765  /* This is a leftover node state entry for a former Pacemaker Remote
1766  * node whose connection resource was removed. Consider it offline.
1767  */
1768  crm_trace("Pacemaker Remote node %s is considered OFFLINE because "
1769  "its connection resource has been removed from the CIB",
1770  this_node->priv->id);
1771  this_node->details->online = FALSE;
1772  return;
1773  }
1774 
1775  launcher = rsc->priv->launcher;
1776  if (launcher != NULL) {
1777  node_type = "Guest";
1778  if (pcmk__list_of_1(rsc->priv->active_nodes)) {
1779  host = rsc->priv->active_nodes->data;
1780  }
1781  }
1782 
1783  /* If the resource is currently started, mark it online. */
1784  if (rsc->priv->orig_role == pcmk_role_started) {
1785  this_node->details->online = TRUE;
1786  }
1787 
1788  /* consider this node shutting down if transitioning start->stop */
1789  if ((rsc->priv->orig_role == pcmk_role_started)
1790  && (rsc->priv->next_role == pcmk_role_stopped)) {
1791 
1792  crm_trace("%s node %s shutting down because connection resource is stopping",
1793  node_type, this_node->priv->id);
1794  this_node->details->shutdown = TRUE;
1795  }
1796 
1797  /* Now check all the failure conditions. */
1798  if ((launcher != NULL) && pcmk_is_set(launcher->flags, pcmk__rsc_failed)) {
1799  crm_trace("Guest node %s UNCLEAN because guest resource failed",
1800  this_node->priv->id);
1801  this_node->details->online = FALSE;
1803 
1804  } else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
1805  crm_trace("%s node %s OFFLINE because connection resource failed",
1806  node_type, this_node->priv->id);
1807  this_node->details->online = FALSE;
1808 
1809  } else if ((rsc->priv->orig_role == pcmk_role_stopped)
1810  || ((launcher != NULL)
1811  && (launcher->priv->orig_role == pcmk_role_stopped))) {
1812 
1813  crm_trace("%s node %s OFFLINE because its resource is stopped",
1814  node_type, this_node->priv->id);
1815  this_node->details->online = FALSE;
1817 
1818  } else if (host && (host->details->online == FALSE)
1819  && host->details->unclean) {
1820  crm_trace("Guest node %s UNCLEAN because host is unclean",
1821  this_node->priv->id);
1822  this_node->details->online = FALSE;
1824 
1825  } else {
1826  crm_trace("%s node %s is %s",
1827  node_type, this_node->priv->id,
1828  this_node->details->online? "ONLINE" : "OFFLINE");
1829  }
1830 }
1831 
1832 static void
1833 determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
1835 {
1836  gboolean online = FALSE;
1837  const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
1838 
1839  CRM_CHECK(this_node != NULL, return);
1840 
1841  this_node->details->shutdown = FALSE;
1842 
1843  if (pe__shutdown_requested(this_node)) {
1844  this_node->details->shutdown = TRUE;
1845 
1846  } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1848  }
1849 
1851  online = determine_online_status_no_fencing(scheduler, node_state,
1852  this_node);
1853 
1854  } else {
1855  online = determine_online_status_fencing(scheduler, node_state,
1856  this_node);
1857  }
1858 
1859  if (online) {
1860  this_node->details->online = TRUE;
1861 
1862  } else {
1863  /* remove node from contention */
1864  this_node->assign->score = -PCMK_SCORE_INFINITY;
1865  }
1866 
1867  if (online && this_node->details->shutdown) {
1868  /* don't run resources here */
1869  this_node->assign->score = -PCMK_SCORE_INFINITY;
1870  }
1871 
1872  if (this_node->details->unclean) {
1873  pcmk__sched_warn(scheduler, "%s is unclean",
1874  pcmk__node_name(this_node));
1875 
1876  } else if (!this_node->details->online) {
1877  crm_trace("%s is offline", pcmk__node_name(this_node));
1878 
1879  } else if (this_node->details->shutdown) {
1880  crm_info("%s is shutting down", pcmk__node_name(this_node));
1881 
1882  } else if (this_node->details->pending) {
1883  crm_info("%s is pending", pcmk__node_name(this_node));
1884 
1885  } else if (pcmk_is_set(this_node->priv->flags, pcmk__node_standby)) {
1886  crm_info("%s is in standby", pcmk__node_name(this_node));
1887 
1888  } else if (this_node->details->maintenance) {
1889  crm_info("%s is in maintenance", pcmk__node_name(this_node));
1890 
1891  } else {
1892  crm_info("%s is online", pcmk__node_name(this_node));
1893  }
1894 }
1895 
1904 const char *
1905 pe_base_name_end(const char *id)
1906 {
1907  if (!pcmk__str_empty(id)) {
1908  const char *end = id + strlen(id) - 1;
1909 
1910  for (const char *s = end; s > id; --s) {
1911  switch (*s) {
1912  case '0':
1913  case '1':
1914  case '2':
1915  case '3':
1916  case '4':
1917  case '5':
1918  case '6':
1919  case '7':
1920  case '8':
1921  case '9':
1922  break;
1923  case ':':
1924  return (s == end)? s : (s - 1);
1925  default:
1926  return end;
1927  }
1928  }
1929  return end;
1930  }
1931  return NULL;
1932 }
1933 
1944 char *
1945 clone_strip(const char *last_rsc_id)
1946 {
1947  const char *end = pe_base_name_end(last_rsc_id);
1948  char *basename = NULL;
1949 
1950  pcmk__assert(end != NULL);
1951  basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1952  pcmk__assert(basename != NULL);
1953  return basename;
1954 }
1955 
1966 char *
1967 clone_zero(const char *last_rsc_id)
1968 {
1969  const char *end = pe_base_name_end(last_rsc_id);
1970  size_t base_name_len = end - last_rsc_id + 1;
1971  char *zero = NULL;
1972 
1973  pcmk__assert(end != NULL);
1974  zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
1975  memcpy(zero, last_rsc_id, base_name_len);
1976  zero[base_name_len] = ':';
1977  zero[base_name_len + 1] = '0';
1978  return zero;
1979 }
1980 
1981 static pcmk_resource_t *
1982 create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
1984 {
1985  pcmk_resource_t *rsc = NULL;
1986  xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
1987 
1988  pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
1989  crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
1990  crm_log_xml_debug(xml_rsc, "Orphan resource");
1991 
1992  if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
1993  return NULL;
1994  }
1995 
1996  if (xml_contains_remote_node(xml_rsc)) {
1997  pcmk_node_t *node;
1998 
1999  crm_debug("Detected orphaned remote node %s", rsc_id);
2000  node = pcmk_find_node(scheduler, rsc_id);
2001  if (node == NULL) {
2002  node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, 0,
2003  scheduler);
2004  }
2005  link_rsc2remotenode(scheduler, rsc);
2006 
2007  if (node) {
2008  crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
2009  node->details->shutdown = TRUE;
2010  }
2011  }
2012 
2013  if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) {
2014  // This removed resource needs to be mapped to a launcher
2015  crm_trace("Launched resource %s was removed from the configuration",
2016  rsc_id);
2018  }
2020  scheduler->priv->resources = g_list_append(scheduler->priv->resources, rsc);
2021  return rsc;
2022 }
2023 
2035 static pcmk_resource_t *
2036 create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
2037  const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2038 {
2040  pcmk_resource_t *orphan = NULL;
2041 
2042  // find_rsc() because we might be a cloned group
2043  orphan = top->priv->fns->find_rsc(top, rsc_id, NULL,
2045 
2046  pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s",
2047  top->id, parent->id, rsc_id, pcmk__node_name(node));
2048  return orphan;
2049 }
2050 
2066 static pcmk_resource_t *
2067 find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2068  pcmk_resource_t *parent, const char *rsc_id)
2069 {
2070  GList *rIter = NULL;
2071  pcmk_resource_t *rsc = NULL;
2072  pcmk_resource_t *inactive_instance = NULL;
2073  gboolean skip_inactive = FALSE;
2074 
2075  pcmk__assert(pcmk__is_anonymous_clone(parent));
2076 
2077  // Check for active (or partially active, for cloned groups) instance
2078  pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
2079  rsc_id, pcmk__node_name(node), parent->id);
2080 
2081  for (rIter = parent->priv->children;
2082  (rIter != NULL) && (rsc == NULL); rIter = rIter->next) {
2083 
2084  GList *locations = NULL;
2085  pcmk_resource_t *child = rIter->data;
2086 
2087  /* Check whether this instance is already known to be active or pending
2088  * anywhere, at this stage of unpacking. Because this function is called
2089  * for a resource before the resource's individual operation history
2090  * entries are unpacked, locations will generally not contain the
2091  * desired node.
2092  *
2093  * However, there are three exceptions:
2094  * (1) when child is a cloned group and we have already unpacked the
2095  * history of another member of the group on the same node;
2096  * (2) when we've already unpacked the history of another numbered
2097  * instance on the same node (which can happen if
2098  * PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
2099  * (3) when we re-run calculations on the same scheduler data as part of
2100  * a simulation.
2101  */
2102  child->priv->fns->location(child, &locations, pcmk__rsc_node_current
2104  if (locations) {
2105  /* We should never associate the same numbered anonymous clone
2106  * instance with multiple nodes, and clone instances can't migrate,
2107  * so there must be only one location, regardless of history.
2108  */
2109  CRM_LOG_ASSERT(locations->next == NULL);
2110 
2111  if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
2112  /* This child instance is active on the requested node, so check
2113  * for a corresponding configured resource. We use find_rsc()
2114  * instead of child because child may be a cloned group, and we
2115  * need the particular member corresponding to rsc_id.
2116  *
2117  * If the history entry is orphaned, rsc will be NULL.
2118  */
2119  rsc = parent->priv->fns->find_rsc(child, rsc_id, NULL,
2121  if (rsc) {
2122  /* If there are multiple instance history entries for an
2123  * anonymous clone in a single node's history (which can
2124  * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
2125  * to false), we want to consider the instances beyond the
2126  * first as orphans, even if there are inactive instance
2127  * numbers available.
2128  */
2129  if (rsc->priv->active_nodes != NULL) {
2130  crm_notice("Active (now-)anonymous clone %s has "
2131  "multiple (orphan) instance histories on %s",
2132  parent->id, pcmk__node_name(node));
2133  skip_inactive = TRUE;
2134  rsc = NULL;
2135  } else {
2136  pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
2137  }
2138  }
2139  }
2140  g_list_free(locations);
2141 
2142  } else {
2143  pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
2144  if (!skip_inactive && !inactive_instance
2145  && !pcmk_is_set(child->flags, pcmk__rsc_blocked)) {
2146  // Remember one inactive instance in case we don't find active
2147  inactive_instance =
2148  parent->priv->fns->find_rsc(child, rsc_id, NULL,
2150 
2151  /* ... but don't use it if it was already associated with a
2152  * pending action on another node
2153  */
2154  if (inactive_instance != NULL) {
2155  const pcmk_node_t *pending_node = NULL;
2156 
2157  pending_node = inactive_instance->priv->pending_node;
2158  if ((pending_node != NULL)
2159  && !pcmk__same_node(pending_node, node)) {
2160  inactive_instance = NULL;
2161  }
2162  }
2163  }
2164  }
2165  }
2166 
2167  if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2168  pcmk__rsc_trace(parent, "Resource %s, empty slot",
2169  inactive_instance->id);
2170  rsc = inactive_instance;
2171  }
2172 
2173  /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
2174  * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
2175  * don't want to consume a valid instance number for unclean nodes. Such
2176  * instances may appear to be active according to the history, but should be
2177  * considered inactive, so we can start an instance elsewhere. Treat such
2178  * instances as orphans.
2179  *
2180  * An exception is instances running on guest nodes -- since guest node
2181  * "fencing" is actually just a resource stop, requires shouldn't apply.
2182  *
2183  * @TODO Ideally, we'd use an inactive instance number if it is not needed
2184  * for any clean instances. However, we don't know that at this point.
2185  */
2186  if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing)
2187  && (!node->details->online || node->details->unclean)
2188  && !pcmk__is_guest_or_bundle_node(node)
2190 
2191  rsc = NULL;
2192  }
2193 
2194  if (rsc == NULL) {
2195  rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
2196  pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id);
2197  }
2198  return rsc;
2199 }
2200 
2201 static pcmk_resource_t *
2202 unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2203  const char *rsc_id)
2204 {
2205  pcmk_resource_t *rsc = NULL;
2206  pcmk_resource_t *parent = NULL;
2207 
2208  crm_trace("looking for %s", rsc_id);
2209  rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2210 
2211  if (rsc == NULL) {
2212  /* If we didn't find the resource by its name in the operation history,
2213  * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
2214  * we create a single :0 orphan to match against here.
2215  */
2216  char *clone0_id = clone_zero(rsc_id);
2218  clone0_id);
2219 
2220  if (clone0 && !pcmk_is_set(clone0->flags, pcmk__rsc_unique)) {
2221  rsc = clone0;
2222  parent = uber_parent(clone0);
2223  crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2224  } else {
2225  crm_trace("%s is not known as %s either (orphan)",
2226  rsc_id, clone0_id);
2227  }
2228  free(clone0_id);
2229 
2230  } else if (rsc->priv->variant > pcmk__rsc_variant_primitive) {
2231  crm_trace("Resource history for %s is orphaned "
2232  "because it is no longer primitive", rsc_id);
2233  return NULL;
2234 
2235  } else {
2236  parent = uber_parent(rsc);
2237  }
2238 
2239  if (pcmk__is_anonymous_clone(parent)) {
2240 
2241  if (pcmk__is_bundled(parent)) {
2242  rsc = pe__find_bundle_replica(parent->priv->parent, node);
2243  } else {
2244  char *base = clone_strip(rsc_id);
2245 
2246  rsc = find_anonymous_clone(scheduler, node, parent, base);
2247  free(base);
2248  pcmk__assert(rsc != NULL);
2249  }
2250  }
2251 
2252  if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none)
2253  && !pcmk__str_eq(rsc_id, rsc->priv->history_id, pcmk__str_none)) {
2254 
2255  pcmk__str_update(&(rsc->priv->history_id), rsc_id);
2256  pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2257  rsc_id, pcmk__node_name(node), rsc->id,
2258  pcmk_is_set(rsc->flags, pcmk__rsc_removed)? " (ORPHAN)" : "");
2259  }
2260  return rsc;
2261 }
2262 
2263 static pcmk_resource_t *
2264 process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
2266 {
2267  pcmk_resource_t *rsc = NULL;
2268  const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2269 
2270  crm_debug("Detected orphan resource %s on %s",
2271  rsc_id, pcmk__node_name(node));
2272  rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2273  if (rsc == NULL) {
2274  return NULL;
2275  }
2276 
2279 
2280  } else {
2281  CRM_CHECK(rsc != NULL, return NULL);
2282  pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id);
2284  "__orphan_do_not_run__", scheduler);
2285  }
2286  return rsc;
2287 }
2288 
2289 static void
2290 process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
2291  enum pcmk__on_fail on_fail)
2292 {
2293  pcmk_node_t *tmpnode = NULL;
2294  char *reason = NULL;
2295  enum pcmk__on_fail save_on_fail = pcmk__on_fail_ignore;
2296  pcmk_scheduler_t *scheduler = NULL;
2297  bool known_active = false;
2298 
2299  pcmk__assert(rsc != NULL);
2300  scheduler = rsc->priv->scheduler;
2301  known_active = (rsc->priv->orig_role > pcmk_role_stopped);
2302  pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2303  rsc->id, pcmk_role_text(rsc->priv->orig_role),
2304  pcmk__node_name(node), pcmk__on_fail_text(on_fail));
2305 
2306  /* process current state */
2307  if (rsc->priv->orig_role != pcmk_role_unknown) {
2308  pcmk_resource_t *iter = rsc;
2309 
2310  while (iter) {
2311  if (g_hash_table_lookup(iter->priv->probed_nodes,
2312  node->priv->id) == NULL) {
2313  pcmk_node_t *n = pe__copy_node(node);
2314 
2315  pcmk__rsc_trace(rsc, "%s (%s in history) known on %s",
2316  rsc->id,
2317  pcmk__s(rsc->priv->history_id, "the same"),
2318  pcmk__node_name(n));
2319  g_hash_table_insert(iter->priv->probed_nodes,
2320  (gpointer) n->priv->id, n);
2321  }
2322  if (pcmk_is_set(iter->flags, pcmk__rsc_unique)) {
2323  break;
2324  }
2325  iter = iter->priv->parent;
2326  }
2327  }
2328 
2329  /* If a managed resource is believed to be running, but node is down ... */
2330  if (known_active && !node->details->online && !node->details->maintenance
2331  && pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
2332 
2333  gboolean should_fence = FALSE;
2334 
2335  /* If this is a guest node, fence it (regardless of whether fencing is
2336  * enabled, because guest node fencing is done by recovery of the
2337  * container resource rather than by the fencer). Mark the resource
2338  * we're processing as failed. When the guest comes back up, its
2339  * operation history in the CIB will be cleared, freeing the affected
2340  * resource to run again once we are sure we know its state.
2341  */
2342  if (pcmk__is_guest_or_bundle_node(node)) {
2344  should_fence = TRUE;
2345 
2347  if (pcmk__is_remote_node(node)
2348  && (node->priv->remote != NULL)
2349  && !pcmk_is_set(node->priv->remote->flags,
2350  pcmk__rsc_failed)) {
2351 
2352  /* Setting unseen means that fencing of the remote node will
2353  * occur only if the connection resource is not going to start
2354  * somewhere. This allows connection resources on a failed
2355  * cluster node to move to another node without requiring the
2356  * remote nodes to be fenced as well.
2357  */
2359  reason = crm_strdup_printf("%s is active there (fencing will be"
2360  " revoked if remote connection can "
2361  "be re-established elsewhere)",
2362  rsc->id);
2363  }
2364  should_fence = TRUE;
2365  }
2366 
2367  if (should_fence) {
2368  if (reason == NULL) {
2369  reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
2370  }
2371  pe_fence_node(scheduler, node, reason, FALSE);
2372  }
2373  free(reason);
2374  }
2375 
2376  /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2377  save_on_fail = on_fail;
2378 
2379  if (node->details->unclean) {
2380  /* No extra processing needed
2381  * Also allows resources to be started again after a node is shot
2382  */
2383  on_fail = pcmk__on_fail_ignore;
2384  }
2385 
2386  switch (on_fail) {
2387  case pcmk__on_fail_ignore:
2388  /* nothing to do */
2389  break;
2390 
2391  case pcmk__on_fail_demote:
2393  demote_action(rsc, node, FALSE);
2394  break;
2395 
2397  /* treat it as if it is still running
2398  * but also mark the node as unclean
2399  */
2400  reason = crm_strdup_printf("%s failed there", rsc->id);
2401  pe_fence_node(scheduler, node, reason, FALSE);
2402  free(reason);
2403  break;
2404 
2406  pcmk__set_node_flags(node,
2408  break;
2409 
2410  case pcmk__on_fail_block:
2411  /* is_managed == FALSE will prevent any
2412  * actions being sent for the resource
2413  */
2416  break;
2417 
2418  case pcmk__on_fail_ban:
2419  /* make sure it comes up somewhere else
2420  * or not at all
2421  */
2423  "__action_migration_auto__", scheduler);
2424  break;
2425 
2426  case pcmk__on_fail_stop:
2429  break;
2430 
2431  case pcmk__on_fail_restart:
2432  if (known_active) {
2433  pcmk__set_rsc_flags(rsc,
2435  stop_action(rsc, node, FALSE);
2436  }
2437  break;
2438 
2441  if ((rsc->priv->launcher != NULL) && pcmk__is_bundled(rsc)) {
2442  /* A bundle's remote connection can run on a different node than
2443  * the bundle's container. We don't necessarily know where the
2444  * container is running yet, so remember it and add a stop
2445  * action for it later.
2446  */
2448  g_list_prepend(scheduler->priv->stop_needed,
2449  rsc->priv->launcher);
2450  } else if (rsc->priv->launcher != NULL) {
2451  stop_action(rsc->priv->launcher, node, FALSE);
2452  } else if (known_active) {
2453  stop_action(rsc, node, FALSE);
2454  }
2455  break;
2456 
2460  tmpnode = NULL;
2462  tmpnode = pcmk_find_node(scheduler, rsc->id);
2463  }
2464  if (pcmk__is_remote_node(tmpnode)
2465  && !pcmk_is_set(tmpnode->priv->flags,
2467  /* The remote connection resource failed in a way that
2468  * should result in fencing the remote node.
2469  */
2470  pe_fence_node(scheduler, tmpnode,
2471  "remote connection is unrecoverable", FALSE);
2472  }
2473  }
2474 
2475  /* require the stop action regardless if fencing is occurring or not. */
2476  if (known_active) {
2477  stop_action(rsc, node, FALSE);
2478  }
2479 
2480  /* if reconnect delay is in use, prevent the connection from exiting the
2481  * "STOPPED" role until the failure is cleared by the delay timeout. */
2482  if (rsc->priv->remote_reconnect_ms > 0U) {
2483  pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2484  }
2485  break;
2486  }
2487 
2488  /* Ensure a remote connection failure forces an unclean Pacemaker Remote
2489  * node to be fenced. By marking the node as seen, the failure will result
2490  * in a fencing operation regardless if we're going to attempt to reconnect
2491  * in this transition.
2492  */
2493  if (pcmk_all_flags_set(rsc->flags,
2495  tmpnode = pcmk_find_node(scheduler, rsc->id);
2496  if (tmpnode && tmpnode->details->unclean) {
2498  }
2499  }
2500 
2501  if (known_active) {
2502  if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
2503  if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
2504  crm_notice("Removed resource %s is active on %s and will be "
2505  "stopped when possible",
2506  rsc->id, pcmk__node_name(node));
2507  } else {
2508  crm_notice("Removed resource %s must be stopped manually on %s "
2510  " is set to false", rsc->id, pcmk__node_name(node));
2511  }
2512  }
2513 
2514  native_add_running(rsc, node, scheduler,
2515  (save_on_fail != pcmk__on_fail_ignore));
2516  switch (on_fail) {
2517  case pcmk__on_fail_ignore:
2518  break;
2519  case pcmk__on_fail_demote:
2520  case pcmk__on_fail_block:
2522  break;
2523  default:
2524  pcmk__set_rsc_flags(rsc,
2526  break;
2527  }
2528 
2529  } else if ((rsc->priv->history_id != NULL)
2530  && (strchr(rsc->priv->history_id, ':') != NULL)) {
2531  /* Only do this for older status sections that included instance numbers
2532  * Otherwise stopped instances will appear as orphans
2533  */
2534  pcmk__rsc_trace(rsc, "Clearing history ID %s for %s (stopped)",
2535  rsc->priv->history_id, rsc->id);
2536  free(rsc->priv->history_id);
2537  rsc->priv->history_id = NULL;
2538 
2539  } else {
2540  GList *possible_matches = pe__resource_actions(rsc, node,
2541  PCMK_ACTION_STOP, FALSE);
2542  GList *gIter = possible_matches;
2543 
2544  for (; gIter != NULL; gIter = gIter->next) {
2545  pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2546 
2548  }
2549 
2550  g_list_free(possible_matches);
2551  }
2552 
2553  /* A successful stop after migrate_to on the migration source doesn't make
2554  * the partially migrated resource stopped on the migration target.
2555  */
2556  if ((rsc->priv->orig_role == pcmk_role_stopped)
2557  && (rsc->priv->active_nodes != NULL)
2558  && (rsc->priv->partial_migration_target != NULL)
2559  && pcmk__same_node(rsc->priv->partial_migration_source, node)) {
2560 
2562  }
2563 }
2564 
2565 /* create active recurring operations as optional */
2566 static void
2567 process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
2568  int start_index, int stop_index,
2569  GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2570 {
2571  int counter = -1;
2572  const char *task = NULL;
2573  const char *status = NULL;
2574  GList *gIter = sorted_op_list;
2575 
2576  pcmk__assert(rsc != NULL);
2577  pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
2578  rsc->id, start_index, stop_index);
2579 
2580  for (; gIter != NULL; gIter = gIter->next) {
2581  xmlNode *rsc_op = (xmlNode *) gIter->data;
2582 
2583  guint interval_ms = 0;
2584  char *key = NULL;
2585  const char *id = pcmk__xe_id(rsc_op);
2586 
2587  counter++;
2588 
2589  if (node->details->online == FALSE) {
2590  pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
2591  rsc->id, pcmk__node_name(node));
2592  break;
2593 
2594  /* Need to check if there's a monitor for role="Stopped" */
2595  } else if (start_index < stop_index && counter <= stop_index) {
2596  pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2597  id, pcmk__node_name(node));
2598  continue;
2599 
2600  } else if (counter < start_index) {
2601  pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
2602  id, pcmk__node_name(node), counter);
2603  continue;
2604  }
2605 
2606  crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms);
2607  if (interval_ms == 0) {
2608  pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2609  id, pcmk__node_name(node));
2610  continue;
2611  }
2612 
2613  status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2614  if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2615  pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
2616  id, pcmk__node_name(node));
2617  continue;
2618  }
2619  task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2620  /* create the action */
2621  key = pcmk__op_key(rsc->id, task, interval_ms);
2622  pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
2623  custom_action(rsc, key, task, node, TRUE, scheduler);
2624  }
2625 }
2626 
2627 void
2628 calculate_active_ops(const GList *sorted_op_list, int *start_index,
2629  int *stop_index)
2630 {
2631  int counter = -1;
2632  int implied_monitor_start = -1;
2633  int implied_clone_start = -1;
2634  const char *task = NULL;
2635  const char *status = NULL;
2636 
2637  *stop_index = -1;
2638  *start_index = -1;
2639 
2640  for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2641  const xmlNode *rsc_op = (const xmlNode *) iter->data;
2642 
2643  counter++;
2644 
2645  task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
2646  status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
2647 
2648  if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2649  && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2650  *stop_index = counter;
2651 
2652  } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2653  PCMK_ACTION_MIGRATE_FROM, NULL)) {
2654  *start_index = counter;
2655 
2656  } else if ((implied_monitor_start <= *stop_index)
2657  && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2658  pcmk__str_casei)) {
2659  const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE);
2660 
2661  if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2662  implied_monitor_start = counter;
2663  }
2664  } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
2665  PCMK_ACTION_DEMOTE, NULL)) {
2666  implied_clone_start = counter;
2667  }
2668  }
2669 
2670  if (*start_index == -1) {
2671  if (implied_clone_start != -1) {
2672  *start_index = implied_clone_start;
2673  } else if (implied_monitor_start != -1) {
2674  *start_index = implied_monitor_start;
2675  }
2676  }
2677 }
2678 
2679 // If resource history entry has shutdown lock, remember lock node and time
2680 static void
2681 unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
2682  const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2683 {
2684  time_t lock_time = 0; // When lock started (i.e. node shutdown time)
2685 
2687  &lock_time) == pcmk_ok) && (lock_time != 0)) {
2688 
2689  if ((scheduler->priv->shutdown_lock_ms > 0U)
2691  > (lock_time + pcmk__timeout_ms2s(scheduler->priv->shutdown_lock_ms)))) {
2692  pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
2693  rsc->id, pcmk__node_name(node));
2694  pe__clear_resource_history(rsc, node);
2695  } else {
2696  rsc->priv->lock_node = node;
2697  rsc->priv->lock_time = lock_time;
2698  }
2699  }
2700 }
2701 
2712 static pcmk_resource_t *
2713 unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
2715 {
2716  GList *gIter = NULL;
2717  int stop_index = -1;
2718  int start_index = -1;
2719  enum rsc_role_e req_role = pcmk_role_unknown;
2720 
2721  const char *rsc_id = pcmk__xe_id(lrm_resource);
2722 
2723  pcmk_resource_t *rsc = NULL;
2724  GList *op_list = NULL;
2725  GList *sorted_op_list = NULL;
2726 
2727  xmlNode *rsc_op = NULL;
2728  xmlNode *last_failure = NULL;
2729 
2730  enum pcmk__on_fail on_fail = pcmk__on_fail_ignore;
2731  enum rsc_role_e saved_role = pcmk_role_unknown;
2732 
2733  if (rsc_id == NULL) {
2734  pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
2735  " entry: No " PCMK_XA_ID);
2736  crm_log_xml_info(lrm_resource, "missing-id");
2737  return NULL;
2738  }
2739  crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s",
2740  rsc_id, pcmk__node_name(node));
2741 
2742  /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
2743  * them
2744  */
2745  for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
2746  NULL);
2747  rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
2748 
2749  op_list = g_list_prepend(op_list, rsc_op);
2750  }
2751 
2753  if (op_list == NULL) {
2754  // If there are no operations, there is nothing to do
2755  return NULL;
2756  }
2757  }
2758 
2759  /* find the resource */
2760  rsc = unpack_find_resource(scheduler, node, rsc_id);
2761  if (rsc == NULL) {
2762  if (op_list == NULL) {
2763  // If there are no operations, there is nothing to do
2764  return NULL;
2765  } else {
2766  rsc = process_orphan_resource(lrm_resource, node, scheduler);
2767  }
2768  }
2769  pcmk__assert(rsc != NULL);
2770 
2771  // Check whether the resource is "shutdown-locked" to this node
2773  unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2774  }
2775 
2776  /* process operations */
2777  saved_role = rsc->priv->orig_role;
2779  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2780 
2781  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2782  xmlNode *rsc_op = (xmlNode *) gIter->data;
2783 
2784  unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2785  }
2786 
2787  /* create active recurring operations as optional */
2788  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2789  process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2790  scheduler);
2791 
2792  /* no need to free the contents */
2793  g_list_free(sorted_op_list);
2794 
2795  process_rsc_state(rsc, node, on_fail);
2796 
2797  if (get_target_role(rsc, &req_role)) {
2798  if ((rsc->priv->next_role == pcmk_role_unknown)
2799  || (req_role < rsc->priv->next_role)) {
2800 
2801  pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE);
2802 
2803  } else if (req_role > rsc->priv->next_role) {
2804  pcmk__rsc_info(rsc,
2805  "%s: Not overwriting calculated next role %s"
2806  " with requested next role %s",
2807  rsc->id, pcmk_role_text(rsc->priv->next_role),
2808  pcmk_role_text(req_role));
2809  }
2810  }
2811 
2812  if (saved_role > rsc->priv->orig_role) {
2813  rsc->priv->orig_role = saved_role;
2814  }
2815 
2816  return rsc;
2817 }
2818 
2819 static void
2820 handle_removed_launched_resources(const xmlNode *lrm_rsc_list,
2822 {
2823  for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list,
2825  NULL, NULL);
2826  rsc_entry != NULL;
2827  rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2828 
2829  pcmk_resource_t *rsc;
2830  pcmk_resource_t *launcher = NULL;
2831  const char *rsc_id;
2832  const char *launcher_id = NULL;
2833 
2834  launcher_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER);
2835  rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
2836  if ((launcher_id == NULL) || (rsc_id == NULL)) {
2837  continue;
2838  }
2839 
2840  launcher = pe_find_resource(scheduler->priv->resources, launcher_id);
2841  if (launcher == NULL) {
2842  continue;
2843  }
2844 
2845  rsc = pe_find_resource(scheduler->priv->resources, rsc_id);
2846  if ((rsc == NULL) || (rsc->priv->launcher != NULL)
2848  continue;
2849  }
2850 
2851  pcmk__rsc_trace(rsc, "Mapped launcher of removed resource %s to %s",
2852  rsc->id, launcher_id);
2853  rsc->priv->launcher = launcher;
2854  launcher->priv->launched = g_list_append(launcher->priv->launched,
2855  rsc);
2856  }
2857 }
2858 
2867 static void
2868 unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
2870 {
2871  bool found_removed_launched_resource = false;
2872 
2873  // Drill down to PCMK__XE_LRM_RESOURCES section
2874  xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
2875  if (xml == NULL) {
2876  return;
2877  }
2878  xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
2879  if (xml == NULL) {
2880  return;
2881  }
2882 
2883  // Unpack each PCMK__XE_LRM_RESOURCE entry
2884  for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
2886  NULL, NULL);
2887  rsc_entry != NULL;
2888  rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
2889 
2890  pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2891 
2892  if ((rsc != NULL)
2894  found_removed_launched_resource = true;
2895  }
2896  }
2897 
2898  /* Now that all resource state has been unpacked for this node, map any
2899  * removed launched resources to their launchers.
2900  */
2901  if (found_removed_launched_resource) {
2902  handle_removed_launched_resources(xml, scheduler);
2903  }
2904 }
2905 
2906 static void
2907 set_active(pcmk_resource_t *rsc)
2908 {
2909  const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2910 
2911  if (top && pcmk_is_set(top->flags, pcmk__rsc_promotable)) {
2913  } else {
2915  }
2916 }
2917 
2918 static void
2919 set_node_score(gpointer key, gpointer value, gpointer user_data)
2920 {
2921  pcmk_node_t *node = value;
2922  int *score = user_data;
2923 
2924  node->assign->score = *score;
2925 }
2926 
2927 #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
2928  "/" PCMK__XE_NODE_STATE
2929 #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \
2930  "/" PCMK__XE_LRM_RESOURCES \
2931  "/" PCMK__XE_LRM_RESOURCE
2932 #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
2933 
2934 static xmlNode *
2935 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2936  int target_rc, pcmk_scheduler_t *scheduler)
2937 {
2938  GString *xpath = NULL;
2939  xmlNode *xml = NULL;
2940 
2941  CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2942  return NULL);
2943 
2944  xpath = g_string_sized_new(256);
2945  pcmk__g_strcat(xpath,
2946  XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
2947  SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
2948  SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
2949  NULL);
2950 
2951  /* Need to check against transition_magic too? */
2952  if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
2953  pcmk__g_strcat(xpath,
2954  " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
2955  NULL);
2956 
2957  } else if ((source != NULL)
2958  && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
2959  pcmk__g_strcat(xpath,
2960  " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
2961  NULL);
2962  } else {
2963  g_string_append_c(xpath, ']');
2964  }
2965 
2966  xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2967  LOG_DEBUG);
2968  g_string_free(xpath, TRUE);
2969 
2970  if (xml && target_rc >= 0) {
2971  int rc = PCMK_OCF_UNKNOWN_ERROR;
2972  int status = PCMK_EXEC_ERROR;
2973 
2976  if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
2977  return NULL;
2978  }
2979  }
2980  return xml;
2981 }
2982 
2983 static xmlNode *
2984 find_lrm_resource(const char *rsc_id, const char *node_name,
2986 {
2987  GString *xpath = NULL;
2988  xmlNode *xml = NULL;
2989 
2990  CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
2991 
2992  xpath = g_string_sized_new(256);
2993  pcmk__g_strcat(xpath,
2994  XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
2995  SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
2996  NULL);
2997 
2998  xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2999  LOG_DEBUG);
3000 
3001  g_string_free(xpath, TRUE);
3002  return xml;
3003 }
3004 
3014 static bool
3015 unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
3016 {
3017  bool result = false;
3018  xmlXPathObjectPtr search;
3019  char *xpath = NULL;
3020 
3021  xpath = crm_strdup_printf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
3022  SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']"
3024  "[@" PCMK__XA_RC_CODE "!='%d']",
3025  node_name, rsc->id, PCMK_OCF_UNKNOWN);
3026 
3027  search = xpath_search(rsc->priv->scheduler->input, xpath);
3028  result = (numXpathResults(search) == 0);
3029  freeXpathObject(search);
3030  free(xpath);
3031  return result;
3032 }
3033 
3046 static bool
3047 monitor_not_running_after(const char *rsc_id, const char *node_name,
3048  const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3049 {
3050  /* Any probe/monitor operation on the node indicating it was not running
3051  * there
3052  */
3053  xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
3055 
3056  return (monitor != NULL) && (pe__is_newer_op(monitor, xml_op) > 0);
3057 }
3058 
3071 static bool
3072 non_monitor_after(const char *rsc_id, const char *node_name,
3073  const xmlNode *xml_op, pcmk_scheduler_t *scheduler)
3074 {
3075  xmlNode *lrm_resource = NULL;
3076 
3077  lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
3078  if (lrm_resource == NULL) {
3079  return false;
3080  }
3081 
3082  for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
3083  NULL, NULL);
3084  op != NULL; op = pcmk__xe_next(op, PCMK__XE_LRM_RSC_OP)) {
3085 
3086  const char * task = NULL;
3087 
3088  if (op == xml_op) {
3089  continue;
3090  }
3091 
3093 
3096  NULL)
3097  && pe__is_newer_op(op, xml_op) > 0) {
3098  return true;
3099  }
3100  }
3101 
3102  return false;
3103 }
3104 
3118 static bool
3119 newer_state_after_migrate(const char *rsc_id, const char *node_name,
3120  const xmlNode *migrate_to,
3121  const xmlNode *migrate_from,
3123 {
3124  const xmlNode *xml_op = (migrate_from != NULL)? migrate_from : migrate_to;
3125  const char *source = crm_element_value(xml_op, PCMK__META_MIGRATE_SOURCE);
3126 
3127  /* It's preferred to compare to the migrate event on the same node if
3128  * existing, since call ids are more reliable.
3129  */
3130  if ((xml_op != migrate_to) && (migrate_to != NULL)
3131  && pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3132 
3133  xml_op = migrate_to;
3134  }
3135 
3136  /* If there's any newer non-monitor operation on the node, or any newer
3137  * probe/monitor operation on the node indicating it was not running there,
3138  * the migration events potentially no longer matter for the node.
3139  */
3140  return non_monitor_after(rsc_id, node_name, xml_op, scheduler)
3141  || monitor_not_running_after(rsc_id, node_name, xml_op, scheduler);
3142 }
3143 
3156 static int
3157 get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
3158  const pcmk_node_t *target_node,
3159  const char **source_name, const char **target_name)
3160 {
3161  *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE);
3162  *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET);
3163  if ((*source_name == NULL) || (*target_name == NULL)) {
3164  pcmk__config_err("Ignoring resource history entry %s without "
3166  PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
3167  return pcmk_rc_unpack_error;
3168  }
3169 
3170  if ((source_node != NULL)
3171  && !pcmk__str_eq(*source_name, source_node->priv->name,
3173  pcmk__config_err("Ignoring resource history entry %s because "
3174  PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
3175  pcmk__xe_id(entry), *source_name,
3176  pcmk__node_name(source_node));
3177  return pcmk_rc_unpack_error;
3178  }
3179 
3180  if ((target_node != NULL)
3181  && !pcmk__str_eq(*target_name, target_node->priv->name,
3183  pcmk__config_err("Ignoring resource history entry %s because "
3184  PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
3185  pcmk__xe_id(entry), *target_name,
3186  pcmk__node_name(target_node));
3187  return pcmk_rc_unpack_error;
3188  }
3189 
3190  return pcmk_rc_ok;
3191 }
3192 
3193 /*
3194  * \internal
3195  * \brief Add a migration source to a resource's list of dangling migrations
3196  *
3197  * If the migrate_to and migrate_from actions in a live migration both
3198  * succeeded, but there is no stop on the source, the migration is considered
3199  * "dangling." Add the source to the resource's dangling migration list, which
3200  * will be used to schedule a stop on the source without affecting the target.
3201  *
3202  * \param[in,out] rsc Resource involved in migration
3203  * \param[in] node Migration source
3204  */
3205 static void
3206 add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
3207 {
3208  pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3209  rsc->id, pcmk__node_name(node));
3212  g_list_prepend(rsc->priv->dangling_migration_sources,
3213  (gpointer) node);
3214 }
3215 
3222 static void
3223 unpack_migrate_to_success(struct action_history *history)
3224 {
3225  /* A complete migration sequence is:
3226  * 1. migrate_to on source node (which succeeded if we get to this function)
3227  * 2. migrate_from on target node
3228  * 3. stop on source node
3229  *
3230  * If no migrate_from has happened, the migration is considered to be
3231  * "partial". If the migrate_from succeeded but no stop has happened, the
3232  * migration is considered to be "dangling".
3233  *
3234  * If a successful migrate_to and stop have happened on the source node, we
3235  * still need to check for a partial migration, due to scenarios (easier to
3236  * produce with batch-limit=1) like:
3237  *
3238  * - A resource is migrating from node1 to node2, and a migrate_to is
3239  * initiated for it on node1.
3240  *
3241  * - node2 goes into standby mode while the migrate_to is pending, which
3242  * aborts the transition.
3243  *
3244  * - Upon completion of the migrate_to, a new transition schedules a stop
3245  * on both nodes and a start on node1.
3246  *
3247  * - If the new transition is aborted for any reason while the resource is
3248  * stopping on node1, the transition after that stop completes will see
3249  * the migrate_to and stop on the source, but it's still a partial
3250  * migration, and the resource must be stopped on node2 because it is
3251  * potentially active there due to the migrate_to.
3252  *
3253  * We also need to take into account that either node's history may be
3254  * cleared at any point in the migration process.
3255  */
3256  int from_rc = PCMK_OCF_OK;
3257  int from_status = PCMK_EXEC_PENDING;
3258  pcmk_node_t *target_node = NULL;
3259  xmlNode *migrate_from = NULL;
3260  const char *source = NULL;
3261  const char *target = NULL;
3262  bool source_newer_op = false;
3263  bool target_newer_state = false;
3264  bool active_on_target = false;
3265  pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3266 
3267  // Get source and target node names from XML
3268  if (get_migration_node_names(history->xml, history->node, NULL, &source,
3269  &target) != pcmk_rc_ok) {
3270  return;
3271  }
3272 
3273  // Check for newer state on the source
3274  source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3275  scheduler);
3276 
3277  // Check for a migrate_from action from this source on the target
3278  migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3279  target, source, -1, scheduler);
3280  if (migrate_from != NULL) {
3281  if (source_newer_op) {
3282  /* There's a newer non-monitor operation on the source and a
3283  * migrate_from on the target, so this migrate_to is irrelevant to
3284  * the resource's state.
3285  */
3286  return;
3287  }
3288  crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
3289  crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
3290  }
3291 
3292  /* If the resource has newer state on both the source and target after the
3293  * migration events, this migrate_to is irrelevant to the resource's state.
3294  */
3295  target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3296  history->xml, migrate_from,
3297  scheduler);
3298  if (source_newer_op && target_newer_state) {
3299  return;
3300  }
3301 
3302  /* Check for dangling migration (migrate_from succeeded but stop not done).
3303  * We know there's no stop because we already returned if the target has a
3304  * migrate_from and the source has any newer non-monitor operation.
3305  */
3306  if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3307  add_dangling_migration(history->rsc, history->node);
3308  return;
3309  }
3310 
3311  /* Without newer state, this migrate_to implies the resource is active.
3312  * (Clones are not allowed to migrate, so role can't be promoted.)
3313  */
3314  history->rsc->priv->orig_role = pcmk_role_started;
3315 
3316  target_node = pcmk_find_node(scheduler, target);
3317  active_on_target = !target_newer_state && (target_node != NULL)
3318  && target_node->details->online;
3319 
3320  if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3321  if (active_on_target) {
3322  native_add_running(history->rsc, target_node, scheduler, TRUE);
3323  } else {
3324  // Mark resource as failed, require recovery, and prevent migration
3325  pcmk__set_rsc_flags(history->rsc,
3328  }
3329  return;
3330  }
3331 
3332  // The migrate_from is pending, complete but erased, or to be scheduled
3333 
3334  /* If there is no history at all for the resource on an online target, then
3335  * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3336  * have the probe result, it will be reflected in target_newer_state.
3337  */
3338  if ((target_node != NULL) && target_node->details->online
3339  && unknown_on_node(history->rsc, target)) {
3340  return;
3341  }
3342 
3343  if (active_on_target) {
3344  pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3345 
3346  native_add_running(history->rsc, target_node, scheduler, FALSE);
3347  if ((source_node != NULL) && source_node->details->online) {
3348  /* This is a partial migration: the migrate_to completed
3349  * successfully on the source, but the migrate_from has not
3350  * completed. Remember the source and target; if the newly
3351  * chosen target remains the same when we schedule actions
3352  * later, we may continue with the migration.
3353  */
3354  history->rsc->priv->partial_migration_target = target_node;
3355  history->rsc->priv->partial_migration_source = source_node;
3356  }
3357 
3358  } else if (!source_newer_op) {
3359  // Mark resource as failed, require recovery, and prevent migration
3360  pcmk__set_rsc_flags(history->rsc,
3363  }
3364 }
3365 
3372 static void
3373 unpack_migrate_to_failure(struct action_history *history)
3374 {
3375  xmlNode *target_migrate_from = NULL;
3376  const char *source = NULL;
3377  const char *target = NULL;
3378  pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3379 
3380  // Get source and target node names from XML
3381  if (get_migration_node_names(history->xml, history->node, NULL, &source,
3382  &target) != pcmk_rc_ok) {
3383  return;
3384  }
3385 
3386  /* If a migration failed, we have to assume the resource is active. Clones
3387  * are not allowed to migrate, so role can't be promoted.
3388  */
3389  history->rsc->priv->orig_role = pcmk_role_started;
3390 
3391  // Check for migrate_from on the target
3392  target_migrate_from = find_lrm_op(history->rsc->id,
3395 
3396  if (/* If the resource state is unknown on the target, it will likely be
3397  * probed there.
3398  * Don't just consider it running there. We will get back here anyway in
3399  * case the probe detects it's running there.
3400  */
3401  !unknown_on_node(history->rsc, target)
3402  /* If the resource has newer state on the target after the migration
3403  * events, this migrate_to no longer matters for the target.
3404  */
3405  && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3406  target_migrate_from, scheduler)) {
3407  /* The resource has no newer state on the target, so assume it's still
3408  * active there.
3409  * (if it is up).
3410  */
3411  pcmk_node_t *target_node = pcmk_find_node(scheduler, target);
3412 
3413  if (target_node && target_node->details->online) {
3414  native_add_running(history->rsc, target_node, scheduler, FALSE);
3415  }
3416 
3417  } else if (!non_monitor_after(history->rsc->id, source, history->xml,
3418  scheduler)) {
3419  /* We know the resource has newer state on the target, but this
3420  * migrate_to still matters for the source as long as there's no newer
3421  * non-monitor operation there.
3422  */
3423 
3424  // Mark node as having dangling migration so we can force a stop later
3425  history->rsc->priv->dangling_migration_sources =
3426  g_list_prepend(history->rsc->priv->dangling_migration_sources,
3427  (gpointer) history->node);
3428  }
3429 }
3430 
3437 static void
3438 unpack_migrate_from_failure(struct action_history *history)
3439 {
3440  xmlNode *source_migrate_to = NULL;
3441  const char *source = NULL;
3442  const char *target = NULL;
3443  pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3444 
3445  // Get source and target node names from XML
3446  if (get_migration_node_names(history->xml, NULL, history->node, &source,
3447  &target) != pcmk_rc_ok) {
3448  return;
3449  }
3450 
3451  /* If a migration failed, we have to assume the resource is active. Clones
3452  * are not allowed to migrate, so role can't be promoted.
3453  */
3454  history->rsc->priv->orig_role = pcmk_role_started;
3455 
3456  // Check for a migrate_to on the source
3457  source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3458  source, target, PCMK_OCF_OK, scheduler);
3459 
3460  if (/* If the resource state is unknown on the source, it will likely be
3461  * probed there.
3462  * Don't just consider it running there. We will get back here anyway in
3463  * case the probe detects it's running there.
3464  */
3465  !unknown_on_node(history->rsc, source)
3466  /* If the resource has newer state on the source after the migration
3467  * events, this migrate_from no longer matters for the source.
3468  */
3469  && !newer_state_after_migrate(history->rsc->id, source,
3470  source_migrate_to, history->xml,
3471  scheduler)) {
3472  /* The resource has no newer state on the source, so assume it's still
3473  * active there (if it is up).
3474  */
3475  pcmk_node_t *source_node = pcmk_find_node(scheduler, source);
3476 
3477  if (source_node && source_node->details->online) {
3478  native_add_running(history->rsc, source_node, scheduler, TRUE);
3479  }
3480  }
3481 }
3482 
3489 static void
3490 record_failed_op(struct action_history *history)
3491 {
3492  const pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3493 
3494  if (!(history->node->details->online)) {
3495  return;
3496  }
3497 
3498  for (const xmlNode *xIter = scheduler->priv->failed->children;
3499  xIter != NULL; xIter = xIter->next) {
3500 
3501  const char *key = pcmk__xe_history_key(xIter);
3502  const char *uname = crm_element_value(xIter, PCMK_XA_UNAME);
3503 
3504  if (pcmk__str_eq(history->key, key, pcmk__str_none)
3505  && pcmk__str_eq(uname, history->node->priv->name,
3506  pcmk__str_casei)) {
3507  crm_trace("Skipping duplicate entry %s on %s",
3508  history->key, pcmk__node_name(history->node));
3509  return;
3510  }
3511  }
3512 
3513  crm_trace("Adding entry for %s on %s to failed action list",
3514  history->key, pcmk__node_name(history->node));
3515  crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name);
3516  crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
3517  pcmk__xml_copy(scheduler->priv->failed, history->xml);
3518 }
3519 
3520 static char *
3521 last_change_str(const xmlNode *xml_op)
3522 {
3523  time_t when;
3524  char *result = NULL;
3525 
3527  &when) == pcmk_ok) {
3528  char *when_s = pcmk__epoch2str(&when, 0);
3529  const char *p = strchr(when_s, ' ');
3530 
3531  // Skip day of week to make message shorter
3532  if ((p != NULL) && (*(++p) != '\0')) {
3533  result = pcmk__str_copy(p);
3534  }
3535  free(when_s);
3536  }
3537 
3538  if (result == NULL) {
3539  result = pcmk__str_copy("unknown_time");
3540  }
3541 
3542  return result;
3543 }
3544 
3551 static void
3552 ban_from_all_nodes(pcmk_resource_t *rsc)
3553 {
3554  int score = -PCMK_SCORE_INFINITY;
3555  const pcmk_scheduler_t *scheduler = rsc->priv->scheduler;
3556 
3557  if (rsc->priv->parent != NULL) {
3559 
3560  if (pcmk__is_anonymous_clone(parent)) {
3561  /* For anonymous clones, if an operation with
3562  * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
3563  * entire clone must stop.
3564  */
3565  rsc = parent;
3566  }
3567  }
3568 
3569  // Ban the resource from all nodes
3570  crm_notice("%s will not be started under current conditions", rsc->id);
3571  if (rsc->priv->allowed_nodes != NULL) {
3572  g_hash_table_destroy(rsc->priv->allowed_nodes);
3573  }
3575  g_hash_table_foreach(rsc->priv->allowed_nodes, set_node_score, &score);
3576 }
3577 
3586 static void
3587 unpack_failure_handling(struct action_history *history,
3588  enum pcmk__on_fail *on_fail,
3589  enum rsc_role_e *fail_role)
3590 {
3591  xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3592  history->interval_ms, true);
3593 
3594  GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3595  history->task,
3596  history->interval_ms, config);
3597 
3598  const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
3599 
3600  *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3601  history->interval_ms, on_fail_str);
3602  *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3603  meta);
3604  g_hash_table_destroy(meta);
3605 }
3606 
3617 static void
3618 unpack_rsc_op_failure(struct action_history *history,
3619  enum pcmk__on_fail config_on_fail,
3620  enum rsc_role_e fail_role, xmlNode **last_failure,
3621  enum pcmk__on_fail *on_fail)
3622 {
3623  bool is_probe = false;
3624  char *last_change_s = NULL;
3625  pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
3626 
3627  *last_failure = history->xml;
3628 
3629  is_probe = pcmk_xe_is_probe(history->xml);
3630  last_change_s = last_change_str(history->xml);
3631 
3633  && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3634  crm_trace("Unexpected result (%s%s%s) was recorded for "
3635  "%s of %s on %s at %s " QB_XS " exit-status=%d id=%s",
3636  crm_exit_str(history->exit_status),
3637  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3638  pcmk__s(history->exit_reason, ""),
3639  (is_probe? "probe" : history->task), history->rsc->id,
3640  pcmk__node_name(history->node), last_change_s,
3641  history->exit_status, history->id);
3642  } else {
3644  "Unexpected result (%s%s%s) was recorded for %s of "
3645  "%s on %s at %s " QB_XS " exit-status=%d id=%s",
3646  crm_exit_str(history->exit_status),
3647  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3648  pcmk__s(history->exit_reason, ""),
3649  (is_probe? "probe" : history->task), history->rsc->id,
3650  pcmk__node_name(history->node), last_change_s,
3651  history->exit_status, history->id);
3652 
3653  if (is_probe && (history->exit_status != PCMK_OCF_OK)
3654  && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3655  && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3656 
3657  /* A failed (not just unexpected) probe result could mean the user
3658  * didn't know resources will be probed even where they can't run.
3659  */
3660  crm_notice("If it is not possible for %s to run on %s, see "
3661  "the " PCMK_XA_RESOURCE_DISCOVERY " option for location "
3662  "constraints",
3663  history->rsc->id, pcmk__node_name(history->node));
3664  }
3665 
3666  record_failed_op(history);
3667  }
3668 
3669  free(last_change_s);
3670 
3671  if (*on_fail < config_on_fail) {
3672  pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3673  pcmk__on_fail_text(*on_fail),
3674  pcmk__on_fail_text(config_on_fail), history->key);
3675  *on_fail = config_on_fail;
3676  }
3677 
3678  if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3679  resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
3680  "__stop_fail__", scheduler);
3681 
3682  } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3683  unpack_migrate_to_failure(history);
3684 
3685  } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3686  unpack_migrate_from_failure(history);
3687 
3688  } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3689  history->rsc->priv->orig_role = pcmk_role_promoted;
3690 
3691  } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3692  if (config_on_fail == pcmk__on_fail_block) {
3693  history->rsc->priv->orig_role = pcmk_role_promoted;
3694  pe__set_next_role(history->rsc, pcmk_role_stopped,
3695  "demote with " PCMK_META_ON_FAIL "=block");
3696 
3697  } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3698  history->rsc->priv->orig_role = pcmk_role_stopped;
3699 
3700  } else {
3701  /* Staying in the promoted role would put the scheduler and
3702  * controller into a loop. Setting the role to unpromoted is not
3703  * dangerous because the resource will be stopped as part of
3704  * recovery, and any promotion will be ordered after that stop.
3705  */
3706  history->rsc->priv->orig_role = pcmk_role_unpromoted;
3707  }
3708  }
3709 
3710  if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3711  /* leave stopped */
3712  pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3713  history->rsc->priv->orig_role = pcmk_role_stopped;
3714 
3715  } else if (history->rsc->priv->orig_role < pcmk_role_started) {
3716  pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3717  set_active(history->rsc);
3718  }
3719 
3720  pcmk__rsc_trace(history->rsc,
3721  "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
3722  history->rsc->id,
3723  pcmk_role_text(history->rsc->priv->orig_role),
3724  pcmk__btoa(history->node->details->unclean),
3725  pcmk__on_fail_text(config_on_fail),
3726  pcmk_role_text(fail_role));
3727 
3728  if ((fail_role != pcmk_role_started)
3729  && (history->rsc->priv->next_role < fail_role)) {
3730  pe__set_next_role(history->rsc, fail_role, "failure");
3731  }
3732 
3733  if (fail_role == pcmk_role_stopped) {
3734  ban_from_all_nodes(history->rsc);
3735  }
3736 }
3737 
3747 static void
3748 block_if_unrecoverable(struct action_history *history)
3749 {
3750  char *last_change_s = NULL;
3751 
3752  if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3753  return; // All actions besides stop are always recoverable
3754  }
3755  if (pe_can_fence(history->node->priv->scheduler, history->node)) {
3756  return; // Failed stops are recoverable via fencing
3757  }
3758 
3759  last_change_s = last_change_str(history->xml);
3760  pcmk__sched_err(history->node->priv->scheduler,
3761  "No further recovery can be attempted for %s "
3762  "because %s on %s failed (%s%s%s) at %s "
3763  QB_XS " rc=%d id=%s",
3764  history->rsc->id, history->task,
3765  pcmk__node_name(history->node),
3766  crm_exit_str(history->exit_status),
3767  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3768  pcmk__s(history->exit_reason, ""),
3769  last_change_s, history->exit_status, history->id);
3770 
3771  free(last_change_s);
3772 
3774  pcmk__set_rsc_flags(history->rsc, pcmk__rsc_blocked);
3775 }
3776 
3786 static inline void
3787 remap_because(struct action_history *history, const char **why, int value,
3788  const char *reason)
3789 {
3790  if (history->execution_status != value) {
3791  history->execution_status = value;
3792  *why = reason;
3793  }
3794 }
3795 
3818 static void
3819 remap_operation(struct action_history *history,
3820  enum pcmk__on_fail *on_fail, bool expired)
3821 {
3822  bool is_probe = false;
3823  int orig_exit_status = history->exit_status;
3824  int orig_exec_status = history->execution_status;
3825  const char *why = NULL;
3826  const char *task = history->task;
3827 
3828  // Remap degraded results to their successful counterparts
3829  history->exit_status = pcmk__effective_rc(history->exit_status);
3830  if (history->exit_status != orig_exit_status) {
3831  why = "degraded result";
3832  if (!expired && (!history->node->details->shutdown
3833  || history->node->details->online)) {
3834  record_failed_op(history);
3835  }
3836  }
3837 
3838  if (!pcmk__is_bundled(history->rsc)
3839  && pcmk_xe_mask_probe_failure(history->xml)
3840  && ((history->execution_status != PCMK_EXEC_DONE)
3841  || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3842  history->execution_status = PCMK_EXEC_DONE;
3843  history->exit_status = PCMK_OCF_NOT_RUNNING;
3844  why = "equivalent probe result";
3845  }
3846 
3847  /* If the executor reported an execution status of anything but done or
3848  * error, consider that final. But for done or error, we know better whether
3849  * it should be treated as a failure or not, because we know the expected
3850  * result.
3851  */
3852  switch (history->execution_status) {
3853  case PCMK_EXEC_DONE:
3854  case PCMK_EXEC_ERROR:
3855  break;
3856 
3857  // These should be treated as node-fatal
3859  case PCMK_EXEC_NO_SECRETS:
3860  remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3861  "node-fatal error");
3862  goto remap_done;
3863 
3864  default:
3865  goto remap_done;
3866  }
3867 
3868  is_probe = pcmk_xe_is_probe(history->xml);
3869  if (is_probe) {
3870  task = "probe";
3871  }
3872 
3873  if (history->expected_exit_status < 0) {
3874  /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3875  * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3876  * expected exit status in the transition key, which (along with the
3877  * similar case of a corrupted transition key in the CIB) will be
3878  * reported to this function as -1. Pacemaker 2.0+ does not support
3879  * rolling upgrades from those versions or processing of saved CIB files
3880  * from those versions, so we do not need to care much about this case.
3881  */
3882  remap_because(history, &why, PCMK_EXEC_ERROR,
3883  "obsolete history format");
3884  pcmk__config_warn("Expected result not found for %s on %s "
3885  "(corrupt or obsolete CIB?)",
3886  history->key, pcmk__node_name(history->node));
3887 
3888  } else if (history->exit_status == history->expected_exit_status) {
3889  remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
3890 
3891  } else {
3892  remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
3893  pcmk__rsc_debug(history->rsc,
3894  "%s on %s: expected %d (%s), got %d (%s%s%s)",
3895  history->key, pcmk__node_name(history->node),
3896  history->expected_exit_status,
3897  crm_exit_str(history->expected_exit_status),
3898  history->exit_status,
3899  crm_exit_str(history->exit_status),
3900  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3901  pcmk__s(history->exit_reason, ""));
3902  }
3903 
3904  switch (history->exit_status) {
3905  case PCMK_OCF_OK:
3906  if (is_probe
3907  && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
3908  char *last_change_s = last_change_str(history->xml);
3909 
3910  remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3911  pcmk__rsc_info(history->rsc,
3912  "Probe found %s active on %s at %s",
3913  history->rsc->id, pcmk__node_name(history->node),
3914  last_change_s);
3915  free(last_change_s);
3916  }
3917  break;
3918 
3919  case PCMK_OCF_NOT_RUNNING:
3920  if (is_probe
3921  || (history->expected_exit_status == history->exit_status)
3922  || !pcmk_is_set(history->rsc->flags, pcmk__rsc_managed)) {
3923 
3924  /* For probes, recurring monitors for the Stopped role, and
3925  * unmanaged resources, "not running" is not considered a
3926  * failure.
3927  */
3928  remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
3929  history->rsc->priv->orig_role = pcmk_role_stopped;
3930  *on_fail = pcmk__on_fail_ignore;
3931  pe__set_next_role(history->rsc, pcmk_role_unknown,
3932  "not running");
3933  }
3934  break;
3935 
3937  if (is_probe
3938  && (history->exit_status != history->expected_exit_status)) {
3939  char *last_change_s = last_change_str(history->xml);
3940 
3941  remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3942  pcmk__rsc_info(history->rsc,
3943  "Probe found %s active and promoted on %s at %s",
3944  history->rsc->id,
3945  pcmk__node_name(history->node), last_change_s);
3946  free(last_change_s);
3947  }
3948  if (!expired
3949  || (history->exit_status == history->expected_exit_status)) {
3950  history->rsc->priv->orig_role = pcmk_role_promoted;
3951  }
3952  break;
3953 
3955  if (!expired) {
3956  history->rsc->priv->orig_role = pcmk_role_promoted;
3957  }
3958  remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
3959  break;
3960 
3962  remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
3963  break;
3964 
3966  {
3967  guint interval_ms = 0;
3969  &interval_ms);
3970 
3971  if (interval_ms == 0) {
3972  if (!expired) {
3973  block_if_unrecoverable(history);
3974  }
3975  remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3976  "exit status");
3977  } else {
3978  remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
3979  "exit status");
3980  }
3981  }
3982  break;
3983 
3987  if (!expired) {
3988  block_if_unrecoverable(history);
3989  }
3990  remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
3991  break;
3992 
3993  default:
3994  if (history->execution_status == PCMK_EXEC_DONE) {
3995  char *last_change_s = last_change_str(history->xml);
3996 
3997  crm_info("Treating unknown exit status %d from %s of %s "
3998  "on %s at %s as failure",
3999  history->exit_status, task, history->rsc->id,
4000  pcmk__node_name(history->node), last_change_s);
4001  remap_because(history, &why, PCMK_EXEC_ERROR,
4002  "unknown exit status");
4003  free(last_change_s);
4004  }
4005  break;
4006  }
4007 
4008 remap_done:
4009  if (why != NULL) {
4010  pcmk__rsc_trace(history->rsc,
4011  "Remapped %s result from [%s: %s] to [%s: %s] "
4012  "because of %s",
4013  history->key, pcmk_exec_status_str(orig_exec_status),
4014  crm_exit_str(orig_exit_status),
4015  pcmk_exec_status_str(history->execution_status),
4016  crm_exit_str(history->exit_status), why);
4017  }
4018 }
4019 
4020 // return TRUE if start or monitor last failure but parameters changed
4021 static bool
4022 should_clear_for_param_change(const xmlNode *xml_op, const char *task,
4023  pcmk_resource_t *rsc, pcmk_node_t *node)
4024 {
4026  if (pe__bundle_needs_remote_name(rsc)) {
4027  /* We haven't allocated resources yet, so we can't reliably
4028  * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4029  * When that's needed, defer the check until later.
4030  */
4031  pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure,
4032  rsc->priv->scheduler);
4033 
4034  } else {
4035  pcmk__op_digest_t *digest_data = NULL;
4036 
4037  digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4038  rsc->priv->scheduler);
4039  switch (digest_data->rc) {
4040  case pcmk__digest_unknown:
4041  crm_trace("Resource %s history entry %s on %s"
4042  " has no digest to compare",
4043  rsc->id, pcmk__xe_history_key(xml_op),
4044  node->priv->id);
4045  break;
4046  case pcmk__digest_match:
4047  break;
4048  default:
4049  return TRUE;
4050  }
4051  }
4052  }
4053  return FALSE;
4054 }
4055 
4056 // Order action after fencing of remote node, given connection rsc
4057 static void
4058 order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
4060 {
4061  pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
4062 
4063  if (remote_node) {
4064  pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4065  FALSE, scheduler);
4066 
4068  }
4069 }
4070 
4071 static bool
4072 should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
4073  guint interval_ms, bool is_last_failure)
4074 {
4075  /* Clearing failures of recurring monitors has special concerns. The
4076  * executor reports only changes in the monitor result, so if the
4077  * monitor is still active and still getting the same failure result,
4078  * that will go undetected after the failure is cleared.
4079  *
4080  * Also, the operation history will have the time when the recurring
4081  * monitor result changed to the given code, not the time when the
4082  * result last happened.
4083  *
4084  * @TODO We probably should clear such failures only when the failure
4085  * timeout has passed since the last occurrence of the failed result.
4086  * However we don't record that information. We could maybe approximate
4087  * that by clearing only if there is a more recent successful monitor or
4088  * stop result, but we don't even have that information at this point
4089  * since we are still unpacking the resource's operation history.
4090  *
4091  * This is especially important for remote connection resources with a
4092  * reconnect interval, so in that case, we skip clearing failures
4093  * if the remote node hasn't been fenced.
4094  */
4095  if ((rsc->priv->remote_reconnect_ms > 0U)
4096  && pcmk_is_set(rsc->priv->scheduler->flags,
4098  && (interval_ms != 0)
4099  && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4100 
4101  pcmk_node_t *remote_node = pcmk_find_node(rsc->priv->scheduler,
4102  rsc->id);
4103 
4104  if (remote_node && !pcmk_is_set(remote_node->priv->flags,
4106  if (is_last_failure) {
4107  crm_info("Waiting to clear monitor failure for remote node %s"
4108  " until fencing has occurred", rsc->id);
4109  }
4110  return TRUE;
4111  }
4112  }
4113  return FALSE;
4114 }
4115 
4134 static bool
4135 check_operation_expiry(struct action_history *history)
4136 {
4137  bool expired = false;
4138  bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
4139  time_t last_run = 0;
4140  int unexpired_fail_count = 0;
4141  const char *clear_reason = NULL;
4142  const guint expiration_sec =
4143  pcmk__timeout_ms2s(history->rsc->priv->failure_expiration_ms);
4144  pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler;
4145 
4146  if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4147  pcmk__rsc_trace(history->rsc,
4148  "Resource history entry %s on %s is not expired: "
4149  "Not Installed does not expire",
4150  history->id, pcmk__node_name(history->node));
4151  return false; // "Not installed" must always be cleared manually
4152  }
4153 
4154  if ((expiration_sec > 0)
4156  &last_run) == 0)) {
4157 
4158  /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
4159  * timestamp
4160  */
4161 
4162  time_t now = get_effective_time(scheduler);
4163  time_t last_failure = 0;
4164 
4165  // Is this particular operation history older than the failure timeout?
4166  if ((now >= (last_run + expiration_sec))
4167  && !should_ignore_failure_timeout(history->rsc, history->task,
4168  history->interval_ms,
4169  is_last_failure)) {
4170  expired = true;
4171  }
4172 
4173  // Does the resource as a whole have an unexpired fail count?
4174  unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4175  &last_failure,
4177  history->xml);
4178 
4179  // Update scheduler recheck time according to *last* failure
4180  crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d "
4181  "expiration=%s last-failure@%lld",
4182  history->id, (long long) last_run, (expired? "" : "not "),
4183  (long long) now, unexpired_fail_count,
4184  pcmk__readable_interval(expiration_sec * 1000),
4185  (long long) last_failure);
4186  last_failure += expiration_sec + 1;
4187  if (unexpired_fail_count && (now < last_failure)) {
4188  pe__update_recheck_time(last_failure, scheduler,
4189  "fail count expiration");
4190  }
4191  }
4192 
4193  if (expired) {
4194  if (pe_get_failcount(history->node, history->rsc, NULL,
4195  pcmk__fc_default, history->xml)) {
4196  // There is a fail count ignoring timeout
4197 
4198  if (unexpired_fail_count == 0) {
4199  // There is no fail count considering timeout
4200  clear_reason = "it expired";
4201 
4202  } else {
4203  /* This operation is old, but there is an unexpired fail count.
4204  * In a properly functioning cluster, this should only be
4205  * possible if this operation is not a failure (otherwise the
4206  * fail count should be expired too), so this is really just a
4207  * failsafe.
4208  */
4209  pcmk__rsc_trace(history->rsc,
4210  "Resource history entry %s on %s is not "
4211  "expired: Unexpired fail count",
4212  history->id, pcmk__node_name(history->node));
4213  expired = false;
4214  }
4215 
4216  } else if (is_last_failure
4217  && (history->rsc->priv->remote_reconnect_ms > 0U)) {
4218  /* Clear any expired last failure when reconnect interval is set,
4219  * even if there is no fail count.
4220  */
4221  clear_reason = "reconnect interval is set";
4222  }
4223  }
4224 
4225  if (!expired && is_last_failure
4226  && should_clear_for_param_change(history->xml, history->task,
4227  history->rsc, history->node)) {
4228  clear_reason = "resource parameters have changed";
4229  }
4230 
4231  if (clear_reason != NULL) {
4232  pcmk_action_t *clear_op = NULL;
4233 
4234  // Schedule clearing of the fail count
4235  clear_op = pe__clear_failcount(history->rsc, history->node,
4236  clear_reason, scheduler);
4237 
4239  && (history->rsc->priv->remote_reconnect_ms > 0)) {
4240  /* If we're clearing a remote connection due to a reconnect
4241  * interval, we want to wait until any scheduled fencing
4242  * completes.
4243  *
4244  * We could limit this to remote_node->details->unclean, but at
4245  * this point, that's always true (it won't be reliable until
4246  * after unpack_node_history() is done).
4247  */
4248  crm_info("Clearing %s failure will wait until any scheduled "
4249  "fencing of %s completes",
4250  history->task, history->rsc->id);
4251  order_after_remote_fencing(clear_op, history->rsc, scheduler);
4252  }
4253  }
4254 
4255  if (expired && (history->interval_ms == 0)
4256  && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4257  switch (history->exit_status) {
4258  case PCMK_OCF_OK:
4259  case PCMK_OCF_NOT_RUNNING:
4261  case PCMK_OCF_DEGRADED:
4263  // Don't expire probes that return these values
4264  pcmk__rsc_trace(history->rsc,
4265  "Resource history entry %s on %s is not "
4266  "expired: Probe result",
4267  history->id, pcmk__node_name(history->node));
4268  expired = false;
4269  break;
4270  }
4271  }
4272 
4273  return expired;
4274 }
4275 
4276 int
4277 pe__target_rc_from_xml(const xmlNode *xml_op)
4278 {
4279  int target_rc = 0;
4280  const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY);
4281 
4282  if (key == NULL) {
4283  return -1;
4284  }
4285  decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4286  return target_rc;
4287 }
4288 
4298 static void
4299 update_resource_state(struct action_history *history, int exit_status,
4300  const xmlNode *last_failure,
4301  enum pcmk__on_fail *on_fail)
4302 {
4303  bool clear_past_failure = false;
4304 
4305  if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4306  || (!pcmk__is_bundled(history->rsc)
4307  && pcmk_xe_mask_probe_failure(history->xml))) {
4308  history->rsc->priv->orig_role = pcmk_role_stopped;
4309 
4310  } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4311  clear_past_failure = true;
4312 
4313  } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4314  pcmk__str_none)) {
4315  if ((last_failure != NULL)
4316  && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
4317  pcmk__str_none)) {
4318  clear_past_failure = true;
4319  }
4320  if (history->rsc->priv->orig_role < pcmk_role_started) {
4321  set_active(history->rsc);
4322  }
4323 
4324  } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4325  history->rsc->priv->orig_role = pcmk_role_started;
4326  clear_past_failure = true;
4327 
4328  } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4329  history->rsc->priv->orig_role = pcmk_role_stopped;
4330  clear_past_failure = true;
4331 
4332  } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4333  pcmk__str_none)) {
4334  history->rsc->priv->orig_role = pcmk_role_promoted;
4335  clear_past_failure = true;
4336 
4337  } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4338  pcmk__str_none)) {
4339  if (*on_fail == pcmk__on_fail_demote) {
4340  /* Demote clears an error only if
4341  * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
4342  */
4343  clear_past_failure = true;
4344  }
4345  history->rsc->priv->orig_role = pcmk_role_unpromoted;
4346 
4347  } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4348  pcmk__str_none)) {
4349  history->rsc->priv->orig_role = pcmk_role_started;
4350  clear_past_failure = true;
4351 
4352  } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4353  pcmk__str_none)) {
4354  unpack_migrate_to_success(history);
4355 
4356  } else if (history->rsc->priv->orig_role < pcmk_role_started) {
4357  pcmk__rsc_trace(history->rsc, "%s active on %s",
4358  history->rsc->id, pcmk__node_name(history->node));
4359  set_active(history->rsc);
4360  }
4361 
4362  if (!clear_past_failure) {
4363  return;
4364  }
4365 
4366  switch (*on_fail) {
4367  case pcmk__on_fail_stop:
4368  case pcmk__on_fail_ban:
4371  pcmk__rsc_trace(history->rsc,
4372  "%s (%s) is not cleared by a completed %s",
4373  history->rsc->id, pcmk__on_fail_text(*on_fail),
4374  history->task);
4375  break;
4376 
4377  case pcmk__on_fail_block:
4378  case pcmk__on_fail_ignore:
4379  case pcmk__on_fail_demote:
4380  case pcmk__on_fail_restart:
4382  *on_fail = pcmk__on_fail_ignore;
4383  pe__set_next_role(history->rsc, pcmk_role_unknown,
4384  "clear past failures");
4385  break;
4386 
4388  if (history->rsc->priv->remote_reconnect_ms == 0U) {
4389  /* With no reconnect interval, the connection is allowed to
4390  * start again after the remote node is fenced and
4391  * completely stopped. (With a reconnect interval, we wait
4392  * for the failure to be cleared entirely before attempting
4393  * to reconnect.)
4394  */
4395  *on_fail = pcmk__on_fail_ignore;
4396  pe__set_next_role(history->rsc, pcmk_role_unknown,
4397  "clear past failures and reset remote");
4398  }
4399  break;
4400  }
4401 }
4402 
4411 static inline bool
4412 can_affect_state(struct action_history *history)
4413 {
4414  return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4418  "asyncmon", NULL);
4419 }
4420 
4429 static int
4430 unpack_action_result(struct action_history *history)
4431 {
4432  if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS,
4433  &(history->execution_status)) < 0)
4434  || (history->execution_status < PCMK_EXEC_PENDING)
4435  || (history->execution_status > PCMK_EXEC_MAX)
4436  || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4437  pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4438  "with invalid " PCMK__XA_OP_STATUS " '%s'",
4439  history->id, history->rsc->id,
4440  pcmk__node_name(history->node),
4441  pcmk__s(crm_element_value(history->xml,
4443  ""));
4444  return pcmk_rc_unpack_error;
4445  }
4446  if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE,
4447  &(history->exit_status)) < 0)
4448  || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4449  pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4450  "with invalid " PCMK__XA_RC_CODE " '%s'",
4451  history->id, history->rsc->id,
4452  pcmk__node_name(history->node),
4453  pcmk__s(crm_element_value(history->xml,
4455  ""));
4456  return pcmk_rc_unpack_error;
4457  }
4458  history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON);
4459  return pcmk_rc_ok;
4460 }
4461 
4472 static int
4473 process_expired_result(struct action_history *history, int orig_exit_status)
4474 {
4475  if (!pcmk__is_bundled(history->rsc)
4476  && pcmk_xe_mask_probe_failure(history->xml)
4477  && (orig_exit_status != history->expected_exit_status)) {
4478 
4479  if (history->rsc->priv->orig_role <= pcmk_role_stopped) {
4480  history->rsc->priv->orig_role = pcmk_role_unknown;
4481  }
4482  crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
4483  "Masked failure expired",
4484  history->id, history->rsc->id,
4485  pcmk__node_name(history->node));
4486  return pcmk_rc_ok;
4487  }
4488 
4489  if (history->exit_status == history->expected_exit_status) {
4490  return pcmk_rc_undetermined; // Only failures expire
4491  }
4492 
4493  if (history->interval_ms == 0) {
4494  crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
4495  "Expired failure",
4496  history->id, history->task, history->rsc->id,
4497  pcmk__node_name(history->node));
4498  return pcmk_rc_ok;
4499  }
4500 
4501  if (history->node->details->online && !history->node->details->unclean) {
4502  /* Reschedule the recurring action. schedule_cancel() won't work at
4503  * this stage, so as a hacky workaround, forcibly change the restart
4504  * digest so pcmk__check_action_config() does what we want later.
4505  *
4506  * @TODO We should skip this if there is a newer successful monitor.
4507  * Also, this causes rescheduling only if the history entry
4508  * has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
4509  * scheduler regression test doesn't, but that may not be a
4510  * realistic scenario in production).
4511  */
4512  crm_notice("Rescheduling %s-interval %s of %s on %s "
4513  "after failure expired",
4514  pcmk__readable_interval(history->interval_ms), history->task,
4515  history->rsc->id, pcmk__node_name(history->node));
4517  "calculated-failure-timeout");
4518  return pcmk_rc_ok;
4519  }
4520 
4521  return pcmk_rc_undetermined;
4522 }
4523 
4533 static void
4534 mask_probe_failure(struct action_history *history, int orig_exit_status,
4535  const xmlNode *last_failure,
4536  enum pcmk__on_fail *on_fail)
4537 {
4538  pcmk_resource_t *ban_rsc = history->rsc;
4539 
4540  if (!pcmk_is_set(history->rsc->flags, pcmk__rsc_unique)) {
4541  ban_rsc = uber_parent(history->rsc);
4542  }
4543 
4544  crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
4545  crm_exit_str(orig_exit_status), history->rsc->id,
4546  pcmk__node_name(history->node));
4547  update_resource_state(history, history->expected_exit_status, last_failure,
4548  on_fail);
4549  crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name);
4550 
4551  record_failed_op(history);
4552  resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
4553  "masked-probe-failure", ban_rsc->priv->scheduler);
4554 }
4555 
4568 static bool
4569 failure_is_newer(const struct action_history *history,
4570  const xmlNode *last_failure)
4571 {
4572  guint failure_interval_ms = 0U;
4573  long long failure_change = 0LL;
4574  long long this_change = 0LL;
4575 
4576  if (last_failure == NULL) {
4577  return false; // Resource has no last_failure entry
4578  }
4579 
4580  if (!pcmk__str_eq(history->task,
4581  crm_element_value(last_failure, PCMK_XA_OPERATION),
4582  pcmk__str_none)) {
4583  return false; // last_failure is for different action
4584  }
4585 
4586  if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL,
4587  &failure_interval_ms) != pcmk_ok)
4588  || (history->interval_ms != failure_interval_ms)) {
4589  return false; // last_failure is for action with different interval
4590  }
4591 
4593  &this_change, 0LL) != pcmk_rc_ok)
4594  || (pcmk__scan_ll(crm_element_value(last_failure,
4596  &failure_change, 0LL) != pcmk_rc_ok)
4597  || (failure_change < this_change)) {
4598  return false; // Failure is not known to be newer
4599  }
4600 
4601  return true;
4602 }
4603 
4611 static void
4612 process_pending_action(struct action_history *history,
4613  const xmlNode *last_failure)
4614 {
4615  /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4616  * and there might be a RSC_monitor_INTERVAL entry with the last successful
4617  * or pending result.
4618  *
4619  * If last_failure contains the failure of the pending recurring monitor
4620  * we're processing here, and is newer, the action is no longer pending.
4621  * (Pending results have call ID -1, which sorts last, so the last failure
4622  * if any should be known.)
4623  */
4624  if (failure_is_newer(history, last_failure)) {
4625  return;
4626  }
4627 
4628  if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4630  set_active(history->rsc);
4631 
4632  } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4633  history->rsc->priv->orig_role = pcmk_role_promoted;
4634 
4635  } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4636  && history->node->details->unclean) {
4637  /* A migrate_to action is pending on a unclean source, so force a stop
4638  * on the target.
4639  */
4640  const char *migrate_target = NULL;
4641  pcmk_node_t *target = NULL;
4642 
4643  migrate_target = crm_element_value(history->xml,
4645  target = pcmk_find_node(history->rsc->priv->scheduler,
4646  migrate_target);
4647  if (target != NULL) {
4648  stop_action(history->rsc, target, FALSE);
4649  }
4650  }
4651 
4652  if (history->rsc->priv->pending_action != NULL) {
4653  /* There should never be multiple pending actions, but as a failsafe,
4654  * just remember the first one processed for display purposes.
4655  */
4656  return;
4657  }
4658 
4659  if (pcmk_is_probe(history->task, history->interval_ms)) {
4660  /* Pending probes are currently never displayed, even if pending
4661  * operations are requested. If we ever want to change that,
4662  * enable the below and the corresponding part of
4663  * native.c:native_pending_action().
4664  */
4665 #if 0
4666  history->rsc->private->pending_action = strdup("probe");
4667  history->rsc->private->pending_node = history->node;
4668 #endif
4669  } else {
4670  history->rsc->priv->pending_action = strdup(history->task);
4671  history->rsc->priv->pending_node = history->node;
4672  }
4673 }
4674 
4675 static void
4676 unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
4677  xmlNode **last_failure, enum pcmk__on_fail *on_fail)
4678 {
4679  int old_rc = 0;
4680  bool expired = false;
4681  pcmk_resource_t *parent = rsc;
4682  enum rsc_role_e fail_role = pcmk_role_unknown;
4683  enum pcmk__on_fail failure_strategy = pcmk__on_fail_restart;
4684 
4685  struct action_history history = {
4686  .rsc = rsc,
4687  .node = node,
4688  .xml = xml_op,
4689  .execution_status = PCMK_EXEC_UNKNOWN,
4690  };
4691 
4692  CRM_CHECK(rsc && node && xml_op, return);
4693 
4694  history.id = pcmk__xe_id(xml_op);
4695  if (history.id == NULL) {
4696  pcmk__config_err("Ignoring resource history entry for %s on %s "
4697  "without ID", rsc->id, pcmk__node_name(node));
4698  return;
4699  }
4700 
4701  // Task and interval
4702  history.task = crm_element_value(xml_op, PCMK_XA_OPERATION);
4703  if (history.task == NULL) {
4704  pcmk__config_err("Ignoring resource history entry %s for %s on %s "
4705  "without " PCMK_XA_OPERATION,
4706  history.id, rsc->id, pcmk__node_name(node));
4707  return;
4708  }
4709  crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
4710  if (!can_affect_state(&history)) {
4711  pcmk__rsc_trace(rsc,
4712  "Ignoring resource history entry %s for %s on %s "
4713  "with irrelevant action '%s'",
4714  history.id, rsc->id, pcmk__node_name(node),
4715  history.task);
4716  return;
4717  }
4718 
4719  if (unpack_action_result(&history) != pcmk_rc_ok) {
4720  return; // Error already logged
4721  }
4722 
4723  history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4724  history.key = pcmk__xe_history_key(xml_op);
4725  crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
4726 
4727  pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4728  history.id, history.task, history.call_id,
4729  pcmk__node_name(node),
4730  pcmk_exec_status_str(history.execution_status),
4731  crm_exit_str(history.exit_status));
4732 
4733  if (node->details->unclean) {
4734  pcmk__rsc_trace(rsc,
4735  "%s is running on %s, which is unclean (further action "
4736  "depends on value of stop's on-fail attribute)",
4737  rsc->id, pcmk__node_name(node));
4738  }
4739 
4740  expired = check_operation_expiry(&history);
4741  old_rc = history.exit_status;
4742 
4743  remap_operation(&history, on_fail, expired);
4744 
4745  if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4746  goto done;
4747  }
4748 
4749  if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4750  mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4751  goto done;
4752  }
4753 
4754  if (!pcmk_is_set(rsc->flags, pcmk__rsc_unique)) {
4755  parent = uber_parent(rsc);
4756  }
4757 
4758  switch (history.execution_status) {
4759  case PCMK_EXEC_PENDING:
4760  process_pending_action(&history, *last_failure);
4761  goto done;
4762 
4763  case PCMK_EXEC_DONE:
4764  update_resource_state(&history, history.exit_status, *last_failure,
4765  on_fail);
4766  goto done;
4767 
4769  unpack_failure_handling(&history, &failure_strategy, &fail_role);
4770  if (failure_strategy == pcmk__on_fail_ignore) {
4771  crm_warn("Cannot ignore failed %s of %s on %s: "
4772  "Resource agent doesn't exist "
4773  QB_XS " status=%d rc=%d id=%s",
4774  history.task, rsc->id, pcmk__node_name(node),
4775  history.execution_status, history.exit_status,
4776  history.id);
4777  /* Also for printing it as "FAILED" by marking it as
4778  * pcmk__rsc_failed later
4779  */
4780  *on_fail = pcmk__on_fail_ban;
4781  }
4783  "hard-error", rsc->priv->scheduler);
4784  unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4785  last_failure, on_fail);
4786  goto done;
4787 
4789  if (pcmk__is_pacemaker_remote_node(node)
4790  && pcmk_is_set(node->priv->remote->flags,
4791  pcmk__rsc_managed)) {
4792  /* We should never get into a situation where a managed remote
4793  * connection resource is considered OK but a resource action
4794  * behind the connection gets a "not connected" status. But as a
4795  * fail-safe in case a bug or unusual circumstances do lead to
4796  * that, ensure the remote connection is considered failed.
4797  */
4800  }
4801  break; // Not done, do error handling
4802 
4803  case PCMK_EXEC_ERROR:
4804  case PCMK_EXEC_ERROR_HARD:
4805  case PCMK_EXEC_ERROR_FATAL:
4806  case PCMK_EXEC_TIMEOUT:
4808  case PCMK_EXEC_INVALID:
4809  break; // Not done, do error handling
4810 
4811  default: // No other value should be possible at this point
4812  break;
4813  }
4814 
4815  unpack_failure_handling(&history, &failure_strategy, &fail_role);
4816  if ((failure_strategy == pcmk__on_fail_ignore)
4817  || ((failure_strategy == pcmk__on_fail_restart_container)
4818  && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4819 
4820  char *last_change_s = last_change_str(xml_op);
4821 
4822  crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4823  QB_XS " %s",
4824  history.task, crm_exit_str(history.exit_status),
4825  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4826  pcmk__s(history.exit_reason, ""), rsc->id,
4827  pcmk__node_name(node), last_change_s, history.id);
4828  free(last_change_s);
4829 
4830  update_resource_state(&history, history.expected_exit_status,
4831  *last_failure, on_fail);
4832  crm_xml_add(xml_op, PCMK_XA_UNAME, node->priv->name);
4834 
4835  record_failed_op(&history);
4836 
4837  if ((failure_strategy == pcmk__on_fail_restart_container)
4838  && (*on_fail <= pcmk__on_fail_restart)) {
4839  *on_fail = failure_strategy;
4840  }
4841 
4842  } else {
4843  unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4844  last_failure, on_fail);
4845 
4846  if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4847  uint8_t log_level = LOG_ERR;
4848 
4849  if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4850  log_level = LOG_NOTICE;
4851  }
4852  do_crm_log(log_level,
4853  "Preventing %s from restarting on %s because "
4854  "of hard failure (%s%s%s) " QB_XS " %s",
4855  parent->id, pcmk__node_name(node),
4856  crm_exit_str(history.exit_status),
4857  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4858  pcmk__s(history.exit_reason, ""), history.id);
4860  "hard-error", rsc->priv->scheduler);
4861 
4862  } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4864  "Preventing %s from restarting anywhere because "
4865  "of fatal failure (%s%s%s) " QB_XS " %s",
4866  parent->id, crm_exit_str(history.exit_status),
4867  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4868  pcmk__s(history.exit_reason, ""), history.id);
4870  "fatal-error", rsc->priv->scheduler);
4871  }
4872  }
4873 
4874 done:
4875  pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
4876  rsc->id, pcmk__node_name(node), history.id,
4877  pcmk_role_text(rsc->priv->orig_role),
4878  pcmk_role_text(rsc->priv->next_role));
4879 }
4880 
4890 static gboolean
4891 insert_attr(gpointer key, gpointer value, gpointer user_data)
4892 {
4893  GHashTable *table = user_data;
4894 
4895  g_hash_table_insert(table, key, value);
4896  return TRUE;
4897 }
4898 
4899 static void
4900 add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
4902 {
4903  const char *cluster_name = NULL;
4904  const char *dc_id = crm_element_value(scheduler->input, PCMK_XA_DC_UUID);
4905 
4906  pe_rule_eval_data_t rule_data = {
4907  .node_hash = NULL,
4908  .now = scheduler->priv->now,
4909  .match_data = NULL,
4910  .rsc_data = NULL,
4911  .op_data = NULL
4912  };
4913 
4914  pcmk__insert_dup(node->priv->attrs,
4915  CRM_ATTR_UNAME, node->priv->name);
4916 
4917  pcmk__insert_dup(node->priv->attrs, CRM_ATTR_ID, node->priv->id);
4918 
4919  if ((scheduler->dc_node == NULL)
4920  && pcmk__str_eq(node->priv->id, dc_id, pcmk__str_casei)) {
4921 
4922  scheduler->dc_node = node;
4923  pcmk__insert_dup(node->priv->attrs,
4925 
4926  } else if (!pcmk__same_node(node, scheduler->dc_node)) {
4927  pcmk__insert_dup(node->priv->attrs,
4929  }
4930 
4931  cluster_name = g_hash_table_lookup(scheduler->priv->options,
4933  if (cluster_name) {
4935  cluster_name);
4936  }
4937 
4938  if (overwrite) {
4939  /* @TODO Try to reorder some unpacking so that we don't need the
4940  * overwrite argument or to unpack into a temporary table
4941  */
4942  GHashTable *unpacked = pcmk__strkey_table(free, free);
4943 
4945  &rule_data, unpacked, NULL, scheduler);
4946  g_hash_table_foreach_steal(unpacked, insert_attr, node->priv->attrs);
4947  g_hash_table_destroy(unpacked);
4948 
4949  } else {
4951  &rule_data, node->priv->attrs, NULL,
4952  scheduler);
4953  }
4954 
4955  pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_data,
4956  node->priv->utilization, NULL, scheduler);
4957 
4958  if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
4959  pcmk__rsc_node_current) == NULL) {
4960  const char *site_name = pcmk__node_attr(node, "site-name", NULL,
4962 
4963  if (site_name) {
4964  pcmk__insert_dup(node->priv->attrs,
4965  CRM_ATTR_SITE_NAME, site_name);
4966 
4967  } else if (cluster_name) {
4968  /* Default to cluster-name if unset */
4969  pcmk__insert_dup(node->priv->attrs,
4970  CRM_ATTR_SITE_NAME, cluster_name);
4971  }
4972  }
4973 }
4974 
4975 static GList *
4976 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
4977 {
4978  int counter = -1;
4979  int stop_index = -1;
4980  int start_index = -1;
4981 
4982  xmlNode *rsc_op = NULL;
4983 
4984  GList *gIter = NULL;
4985  GList *op_list = NULL;
4986  GList *sorted_op_list = NULL;
4987 
4988  /* extract operations */
4989  op_list = NULL;
4990  sorted_op_list = NULL;
4991 
4992  for (rsc_op = pcmk__xe_first_child(rsc_entry, PCMK__XE_LRM_RSC_OP, NULL,
4993  NULL);
4994  rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) {
4995 
4996  crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc);
4997  crm_xml_add(rsc_op, PCMK_XA_UNAME, node);
4998  op_list = g_list_prepend(op_list, rsc_op);
4999  }
5000 
5001  if (op_list == NULL) {
5002  /* if there are no operations, there is nothing to do */
5003  return NULL;
5004  }
5005 
5006  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
5007 
5008  /* create active recurring operations as optional */
5009  if (active_filter == FALSE) {
5010  return sorted_op_list;
5011  }
5012 
5013  op_list = NULL;
5014 
5015  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
5016 
5017  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
5018  xmlNode *rsc_op = (xmlNode *) gIter->data;
5019 
5020  counter++;
5021 
5022  if (start_index < stop_index) {
5023  crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
5024  break;
5025 
5026  } else if (counter < start_index) {
5027  crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op));
5028  continue;
5029  }
5030  op_list = g_list_append(op_list, rsc_op);
5031  }
5032 
5033  g_list_free(sorted_op_list);
5034  return op_list;
5035 }
5036 
5037 GList *
5038 find_operations(const char *rsc, const char *node, gboolean active_filter,
5040 {
5041  GList *output = NULL;
5042  GList *intermediate = NULL;
5043 
5044  xmlNode *tmp = NULL;
5045  xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS,
5046  NULL, NULL);
5047 
5048  pcmk_node_t *this_node = NULL;
5049 
5050  xmlNode *node_state = NULL;
5051 
5052  CRM_CHECK(status != NULL, return NULL);
5053 
5054  for (node_state = pcmk__xe_first_child(status, PCMK__XE_NODE_STATE, NULL,
5055  NULL);
5056  node_state != NULL;
5057  node_state = pcmk__xe_next(node_state, PCMK__XE_NODE_STATE)) {
5058 
5059  const char *uname = crm_element_value(node_state, PCMK_XA_UNAME);
5060 
5061  if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5062  continue;
5063  }
5064 
5065  this_node = pcmk_find_node(scheduler, uname);
5066  if(this_node == NULL) {
5067  CRM_LOG_ASSERT(this_node != NULL);
5068  continue;
5069 
5070  } else if (pcmk__is_pacemaker_remote_node(this_node)) {
5071  determine_remote_online_status(scheduler, this_node);
5072 
5073  } else {
5074  determine_online_status(node_state, this_node, scheduler);
5075  }
5076 
5077  if (this_node->details->online
5079  /* offline nodes run no resources...
5080  * unless stonith is enabled in which case we need to
5081  * make sure rsc start events happen after the stonith
5082  */
5083  xmlNode *lrm_rsc = NULL;
5084 
5085  tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
5086  NULL);
5088  NULL);
5089 
5090  for (lrm_rsc = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCE,
5091  NULL, NULL);
5092  lrm_rsc != NULL;
5093  lrm_rsc = pcmk__xe_next(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
5094 
5095  const char *rsc_id = crm_element_value(lrm_rsc, PCMK_XA_ID);
5096 
5097  if ((rsc != NULL)
5098  && !pcmk__str_eq(rsc_id, rsc, pcmk__str_none)) {
5099  continue;
5100  }
5101 
5102  intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5103  output = g_list_concat(output, intermediate);
5104  }
5105  }
5106  }
5107 
5108  return output;
5109 }
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition: complex.c:1043
Services API.
pcmk__cpg_host_t host
Definition: cpg.c:52
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:213
bool pe_can_fence(const pcmk_scheduler_t *scheduler, const pcmk_node_t *node)
Definition: utils.c:36
pcmk_node_t * pcmk_find_node(const pcmk_scheduler_t *scheduler, const char *node_name)
Find a node by name in scheduler data.
Definition: scheduler.c:100
xmlNode * pcmk__xml_copy(xmlNode *parent, xmlNode *src)
Definition: xml.c:805
bool pe__shutdown_requested(const pcmk_node_t *node)
Definition: utils.c:658
#define PCMK_XE_FENCING_TOPOLOGY
Definition: xml_names.h:118
A dumping ground.
pcmk_node_t * pe__copy_node(const pcmk_node_t *this_node)
Definition: utils.c:91
Service failed and possibly in promoted role.
Definition: results.h:189
xmlNode * pcmk__xe_first_child(const xmlNode *parent, const char *node_name, const char *attr_n, const char *attr_v)
Definition: xml_element.c:42
#define crm_notice(fmt, args...)
Definition: logging.h:365
#define pcmk__sched_err(scheduler, fmt...)
#define PCMK__XE_LRM_RESOURCES
No connection to executor.
Definition: results.h:317
gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
Definition: unpack.c:601
#define PCMK_XA_NAME
Definition: xml_names.h:330
const char * pcmk_role_text(enum rsc_role_e role)
Get readable description of a resource role.
Definition: roles.c:23
Do not recover resources from outside partition.
Definition: scheduler.h:39
char data[0]
Definition: cpg.c:58
#define PCMK_OPT_STONITH_ENABLED
Definition: options.h:65
void pcmk__free_idref(gpointer data)
Definition: xml_idref.c:61
#define PCMK__XE_TICKET_STATE
Service active and promoted.
Definition: results.h:188
#define CRM_ATTR_KIND
Definition: crm.h:94
#define ST__LEVEL_MIN
Definition: crm_internal.h:92
bool pe__is_universal_clone(const pcmk_resource_t *rsc, const pcmk_scheduler_t *scheduler)
Definition: clone.c:979
#define PCMK_VALUE_FALSE
Definition: options.h:153
#define PCMK__XA_RC_CODE
pcmk_resource_t * pe__find_bundle_replica(const pcmk_resource_t *bundle, const pcmk_node_t *node)
Definition: bundle.c:1369
int pcmk__scan_min_int(const char *text, int *result, int minimum)
Definition: strings.c:116
#define PCMK_XE_STATUS
Definition: xml_names.h:204
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
Definition: complex.c:1017
#define CRM_ATTR_IS_DC
Definition: crm.h:96
#define stop_action(rsc, node, optional)
Definition: internal.h:206
#define PCMK_XE_TEMPLATE
Definition: xml_names.h:211
pcmk_resource_t * parent
Stopped.
Definition: roles.h:36
const char * name
Definition: cib.c:26
enum pcmk_ipc_server type
Definition: cpg.c:51
bool pcmk__strcase_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:1027
#define PCMK_OPT_CONCURRENT_FENCING
Definition: options.h:33
#define XPATH_NODE_STATE
Definition: unpack.c:2927
#define PCMK_XE_PRIMITIVE
Definition: xml_names.h:164
gint pe__cmp_node_name(gconstpointer a, gconstpointer b)
Definition: utils.c:149
#define pcmk__config_warn(fmt...)
#define pcmk__rsc_trace(rsc, fmt, args...)
pcmk__op_digest_t * rsc_action_digest_cmp(pcmk_resource_t *rsc, const xmlNode *xml_op, pcmk_node_t *node, pcmk_scheduler_t *scheduler)
Definition: pe_digest.c:392
Match only clones and their instances, by either clone or instance ID.
Definition: resources.h:40
#define PCMK_XA_RESOURCE_DISCOVERY
Definition: xml_names.h:384
int crm_element_value_epoch(const xmlNode *xml, const char *name, time_t *dest)
Retrieve the seconds-since-epoch value of an XML attribute.
Definition: xml_element.c:1359
pcmk_resource_t * pe_find_resource(GList *rsc_list, const char *id)
Definition: status.c:453
#define pcmk__rsc_info(rsc, fmt, args...)
#define PCMK_OPT_SHUTDOWN_LOCK
Definition: options.h:60
pcmk_resource_t * pe__create_clone_child(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
Definition: clone.c:222
#define pcmk__set_rsc_flags(resource, flags_to_set)
gboolean get_target_role(const pcmk_resource_t *rsc, enum rsc_role_e *role)
Definition: utils.c:417
#define pcmk__config_err(fmt...)
#define PCMK_ACTION_MONITOR
Definition: actions.h:51
#define PCMK_XA_EXIT_REASON
Definition: xml_names.h:274
#define PCMK_XA_NO_QUORUM_PANIC
Definition: xml_names.h:333
Service safely stopped.
Definition: results.h:186
#define set_config_flag(scheduler, option, flag)
Definition: unpack.c:51
pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler)
Definition: pe_actions.c:1245
#define PCMK__XA_RSC_ID
void pcmk__add_idref(GHashTable *table, const char *id, const char *referrer)
Definition: xml_idref.c:32
pcmk__scheduler_private_t * priv
Definition: scheduler.h:99
#define PCMK_ACTION_MIGRATE_TO
Definition: actions.h:50
pcmk_node_t *(* location)(const pcmk_resource_t *rsc, GList **list, uint32_t target)
#define PCMK_XA_INDEX
Definition: xml_names.h:305
int pcmk__xe_foreach_child(xmlNode *xml, const char *child_element_name, int(*handler)(xmlNode *xml, void *userdata), void *userdata)
Definition: xml_element.c:979
#define SUB_XPATH_LRM_RSC_OP
Definition: unpack.c:2932
Promoted.
Definition: roles.h:39
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
Definition: pe_actions.c:1733
#define PCMK_OPT_CLUSTER_NAME
Definition: options.h:31
uint64_t flags
Definition: scheduler.h:89
Necessary CIB secrets are unavailable.
Definition: results.h:320
Stop all resources in partition.
Definition: scheduler.h:40
gboolean shutdown
Definition: nodes.h:62
gboolean unclean
Definition: nodes.h:58
#define PCMK__XA_OP_RESTART_DIGEST
#define CRM_LOG_ASSERT(expr)
Definition: logging.h:196
Service promoted but more likely to fail soon.
Definition: results.h:191
pcmk_action_t * pe__clear_failcount(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *reason, pcmk_scheduler_t *scheduler)
Schedule a controller operation to clear a fail count.
Definition: failcounts.c:464
#define CRMD_JOINSTATE_NACK
Definition: crm.h:138
#define CRM_ATTR_CLUSTER_NAME
Definition: crm.h:97
const pcmk__rsc_methods_t * fns
Ensure crm_exit_t can hold this.
Definition: results.h:296
void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:114
void pcmk__validate_cluster_options(GHashTable *options)
Definition: options.c:1546
const char * pcmk__cluster_option(GHashTable *options, const char *name)
Definition: options.c:1400
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1517
Action did not complete in time.
Definition: results.h:311
const char * pcmk_rc_str(int rc)
Get a user-friendly description of a return code.
Definition: results.c:609
#define PCMK_NODE_ATTR_MAINTENANCE
Definition: nodes.h:31
#define PCMK_OPT_STOP_ORPHAN_ACTIONS
Definition: options.h:70
enum rsc_role_e pcmk__role_after_failure(const pcmk_resource_t *rsc, const char *action_name, enum pcmk__on_fail on_fail, GHashTable *meta)
Definition: pe_actions.c:1012
Execution failed, do not retry on node.
Definition: results.h:314
bool pcmk__ends_with(const char *s, const char *match)
Definition: strings.c:610
#define PCMK_XA_TYPE
Definition: xml_names.h:430
#define PCMK_META_REMOTE_CONNECT_TIMEOUT
Definition: options.h:107
pcmk_node_t * dc_node
Definition: scheduler.h:85
#define PCMK_OPT_STONITH_ACTION
Definition: options.h:64
#define PCMK_XA_RESOURCE
Definition: xml_names.h:382
gboolean pending
Definition: nodes.h:54
#define PCMK_XA_OPERATION
Definition: xml_names.h:349
int pe__unpack_resource(xmlNode *xml_obj, pcmk_resource_t **rsc, pcmk_resource_t *parent, pcmk_scheduler_t *scheduler)
Definition: complex.c:700
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
Definition: xpath.c:189
Action was cancelled.
Definition: results.h:310
gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition: unpack.c:815
#define PCMK_XA_STANDBY
Definition: xml_names.h:406
int pe_get_failcount(const pcmk_node_t *node, pcmk_resource_t *rsc, time_t *last_failure, uint32_t flags, const xmlNode *xml_op)
Definition: failcounts.c:364
No fence device is configured for target.
Definition: results.h:319
xmlNode * pcmk__xe_create(xmlNode *parent, const char *name)
Definition: xml_element.c:407
const char * action
Definition: pcmk_fence.c:32
#define PCMK_OPT_ENABLE_STARTUP_PROBES
Definition: options.h:38
#define PCMK_META_REMOTE_ALLOW_MIGRATE
Definition: options.h:106
enum pe_quorum_policy no_quorum_policy
Definition: scheduler.h:93
#define PCMK_META_IS_MANAGED
Definition: options.h:92
pcmk_node_t * partial_migration_source
#define PCMK__XE_TRANSIENT_ATTRIBUTES
int pcmk__effective_rc(int rc)
Definition: agents.c:63
#define PCMK__META_MIGRATE_TARGET
pcmk__on_fail
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition: xml_element.c:1015
#define PCMK_META_REMOTE_ADDR
Definition: options.h:105
#define pcmk__rsc_debug(rsc, fmt, args...)
bool pcmk_xe_is_probe(const xmlNode *xml_op)
Check whether an action history entry represents a probe.
Definition: probes.c:46
#define demote_action(rsc, node, optional)
Definition: internal.h:218
gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc)
Parse a transition key into its constituent parts.
Definition: actions.c:424
pcmk__node_private_t * priv
Definition: nodes.h:85
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port)
Definition: remote.c:128
#define PCMK_OPT_PLACEMENT_STRATEGY
Definition: options.h:57
#define PCMK_ACTION_DEMOTE
Definition: actions.h:40
#define PCMK__XE_LRM_RESOURCE
#define PCMK__XA_TRANSITION_KEY
int pcmk__scan_ll(const char *text, long long *result, long long default_value)
Definition: strings.c:92
#define CRMD_JOINSTATE_DOWN
Definition: crm.h:135
Maximum value for this enum.
Definition: results.h:323
#define crm_warn(fmt, args...)
Definition: logging.h:362
void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
Definition: complex.c:1265
#define PCMK_XE_TAG
Definition: xml_names.h:208
const char * crm_exit_str(crm_exit_t exit_code)
Definition: results.c:748
char * clone_zero(const char *last_rsc_id)
Definition: unpack.c:1967
#define PCMK_VALUE_FENCE
Definition: options.h:154
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition: xml_element.c:1322
#define crm_debug(fmt, args...)
Definition: logging.h:370
pcmk_scheduler_t * scheduler
#define PCMK_XA_UNAME
Definition: xml_names.h:431
#define PCMK_XA_EXPECTED
Definition: xml_names.h:278
Utility functions.
Used only to initialize variables.
Definition: results.h:307
#define PCMK_OPT_STOP_ALL_RESOURCES
Definition: options.h:69
const char * pe_base_name_end(const char *id)
Definition: unpack.c:1905
void resource_location(pcmk_resource_t *rsc, const pcmk_node_t *node, int score, const char *tag, pcmk_scheduler_t *scheduler)
Definition: utils.c:365
Parameter invalid (in local context)
Definition: results.h:179
#define PCMK_XE_UTILIZATION
Definition: xml_names.h:217
Parameter invalid (inherently)
Definition: results.h:183
enum pcmk__rsc_variant variant
int pcmk_parse_interval_spec(const char *input, guint *result_ms)
Parse milliseconds from a Pacemaker interval specification.
Definition: strings.c:452
#define pcmk__clear_node_flags(node, flags_to_clear)
#define CRM_ATTR_UNAME
Definition: crm.h:92
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: xml_element.c:1168
#define crm_trace(fmt, args...)
Definition: logging.h:372
#define CRMD_JOINSTATE_MEMBER
Definition: crm.h:137
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:149
void pcmk__g_strcat(GString *buffer,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:1297
bool xml_contains_remote_node(xmlNode *xml)
Definition: remote.c:49
#define PCMK_VALUE_MEMBER
Definition: options.h:170
gboolean maintenance
Definition: nodes.h:66
Demote promotable resources and stop all others.
Definition: scheduler.h:43
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:80
Insufficient privileges.
Definition: results.h:181
#define PCMK_OPT_MAINTENANCE_MODE
Definition: options.h:44
#define PCMK_META_REMOTE_NODE
Definition: options.h:108
bool pe__bundle_needs_remote_name(pcmk_resource_t *rsc)
Definition: bundle.c:890
#define PCMK_OPT_SHUTDOWN_LOCK_LIMIT
Definition: options.h:61
#define crm_log_xml_debug(xml, text)
Definition: logging.h:379
#define PCMK_XE_CLUSTER_PROPERTY_SET
Definition: xml_names.h:84
#define PCMK_ACTION_START
Definition: actions.h:63
pcmk__resource_private_t * priv
Definition: resources.h:61
#define PCMK_VALUE_IGNORE
Definition: options.h:162
Unpromoted.
Definition: roles.h:38
#define PCMK_OPT_PRIORITY_FENCING_DELAY
Definition: options.h:58
void pcmk__str_update(char **str, const char *value)
Definition: strings.c:1278
Wrappers for and extensions to libxml2.
rsc_role_e
Definition: roles.h:34
#define PCMK_OPT_STARTUP_FENCING
Definition: options.h:63
#define PCMK_META_TARGET_ROLE
Definition: options.h:113
#define ST__LEVEL_MAX
Definition: crm_internal.h:93
pcmk_resource_t *(* find_rsc)(pcmk_resource_t *rsc, const char *search, const pcmk_node_t *node, int flags)
Action completed, result is known.
Definition: results.h:309
#define PCMK_ACTION_STOP
Definition: actions.h:66
#define PCMK__XA_JOIN
Flag has no effect.
Definition: xml_internal.h:362
GHashTable * pe__node_list2table(const GList *list)
Definition: utils.c:119
#define PCMK_NODE_ATTR_TERMINATE
Definition: nodes.h:33
#define pcmk__clear_rsc_flags(resource, flags_to_clear)
Act as if partition still holds quorum.
Definition: scheduler.h:41
#define PCMK_VALUE_TRUE
Definition: options.h:218
#define PCMK_XA_ID
Definition: xml_names.h:301
GHashTable * digest_cache
Execution failed, do not retry anywhere.
Definition: results.h:315
#define PCMK__XE_LRM
#define PCMK_NODE_ATTR_STANDBY
Definition: nodes.h:32
#define PCMK_XA_VALUE
Definition: xml_names.h:442
pcmk_scheduler_t * scheduler
#define PCMK_XA_SCORE
Definition: xml_names.h:396
void pe__free_digests(gpointer ptr)
Definition: pe_digest.c:33
Fence all nodes in partition.
Definition: scheduler.h:42
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition: actions.c:195
#define PCMK_OPT_NODE_PENDING_TIMEOUT
Definition: options.h:53
Dependencies not available locally.
Definition: results.h:182
bool pcmk_is_probe(const char *task, guint interval)
Check whether an action name and interval represent a probe.
Definition: probes.c:31
#define PCMK_OPT_START_FAILURE_IS_FATAL
Definition: options.h:62
#define PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
bool pcmk__str_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:1051
const pcmk_node_t * pending_node
#define pcmk__str_copy(str)
const pcmk_node_t * lock_node
#define pcmk__warn_once(wo_flag, fmt...)
#define PCMK_XE_TICKETS
Definition: xml_names.h:213
gboolean order_actions(pcmk_action_t *first, pcmk_action_t *then, uint32_t flags)
Definition: utils.c:465
#define PCMK_XA_DC_UUID
Definition: xml_names.h:258
void pe__update_recheck_time(time_t recheck, pcmk_scheduler_t *scheduler, const char *reason)
Definition: utils.c:675
uint32_t id
Definition: cpg.c:48
gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
Definition: unpack.c:217
bool pcmk_xe_mask_probe_failure(const xmlNode *xml_op)
Check whether an action history entry represents a maskable probe.
Definition: probes.c:70
#define PCMK_VALUE_FENCE_LEGACY
Definition: options.h:227
int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b)
Definition: pe_actions.c:1612
void native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_scheduler_t *scheduler, gboolean failed)
Definition: native.c:87
GList * running_rsc
Definition: nodes.h:70
pcmk_node_t * pe_find_node_any(const GList *node_list, const char *id, const char *node_name)
Find a node by name or ID in a list of nodes.
Definition: status.c:488
#define PCMK_XE_META_ATTRIBUTES
Definition: xml_names.h:130
Unspecified error.
Definition: results.h:177
pcmk_action_t * custom_action(pcmk_resource_t *rsc, char *key, const char *task, const pcmk_node_t *on_node, gboolean optional, pcmk_scheduler_t *scheduler)
Create or update an action object.
Definition: pe_actions.c:1093
xmlNode * pcmk__xe_next(const xmlNode *node, const char *element_name)
Definition: xml_element.c:106
#define pcmk__assert(expr)
const char * target
Definition: pcmk_fence.c:31
void int pcmk__xe_get_score(const xmlNode *xml, const char *name, int *score, int default_score)
Definition: xml_element.c:132
pcmk__node_variant
#define pcmk__sched_warn(scheduler, fmt...)
#define PCMK_VALUE_ONLINE
Definition: options.h:186
Requested action not implemented.
Definition: results.h:180
#define PCMK_OPT_STONITH_TIMEOUT
Definition: options.h:67
#define pcmk__set_node_flags(node, flags_to_set)
xmlXPathObjectPtr xpath_search(const xmlNode *xml_top, const char *path)
Definition: xpath.c:139
int pe__target_rc_from_xml(const xmlNode *xml_op)
Definition: unpack.c:4277
xmlNode * pcmk__find_action_config(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, bool include_disabled)
Definition: pe_actions.c:137
const char * pcmk__on_fail_text(enum pcmk__on_fail on_fail)
Definition: actions.c:146
Service active but more likely to fail soon.
Definition: results.h:190
#define PCMK_XE_NODE
Definition: xml_names.h:136
int pcmk__xe_copy_attrs(xmlNode *target, const xmlNode *src, uint32_t flags)
Definition: xml_element.c:252
GHashTable * state
#define PCMK_META_INTERVAL
Definition: options.h:91
#define PCMK_XA_LAST_RC_CHANGE
Definition: xml_names.h:316
GList * nodes
Definition: scheduler.h:97
Agent does not implement requested action.
Definition: results.h:312
#define PCMK_XE_FENCING_LEVEL
Definition: xml_names.h:117
GHashTable * pcmk__strkey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition: strings.c:685
pcmk__action_result_t result
Definition: pcmk_fence.c:37
#define PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS
Definition: options.h:137
gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
Definition: unpack.c:925
G_GNUC_INTERNAL gint pe__cmp_rsc_priority(gconstpointer a, gconstpointer b)
Definition: utils.c:299
gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition: unpack.c:683
#define PCMK_OPT_SYMMETRIC_CLUSTER
Definition: options.h:72
void pe__unpack_node_health_scores(pcmk_scheduler_t *scheduler)
Definition: pe_health.c:24
pcmk_scheduler_t * scheduler
#define PCMK__XE_LRM_RSC_OP
enum pcmk__node_variant variant
pcmk_resource_t * remote
Success.
Definition: results.h:174
GHashTable * node_hash
Definition: common.h:46
char * pcmk__epoch2str(const time_t *source, uint32_t flags)
Definition: iso8601.c:2147
#define PCMK_XA_CRMD
Definition: xml_names.h:256
#define PCMK_META_REMOTE_PORT
Definition: options.h:109
#define pcmk__clear_ticket_flags(ticket, flags_to_clear)
pcmk_resource_t * launcher
#define pcmk__set_ticket_flags(ticket, flags_to_set)
Action is pending.
Definition: results.h:199
xmlNode * input
Definition: scheduler.h:81
void pcmk__validate_fencing_topology(const xmlNode *xml)
Definition: unpack.c:890
#define PCMK_OPT_STOP_ORPHAN_RESOURCES
Definition: options.h:71
#define PCMK_ACTION_MIGRATE_FROM
Definition: actions.h:49
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
Definition: attrs.c:114
#define PCMK__XA_OP_STATUS
pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, int score, pcmk_scheduler_t *scheduler)
Definition: unpack.c:444
#define PCMK__XA_GRANTED
#define PCMK_META_ON_FAIL
Definition: options.h:98
Started.
Definition: roles.h:37
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Definition: xml_element.c:1201
guint pcmk__timeout_ms2s(guint timeout_ms)
Definition: utils.c:425
#define PCMK_XE_OBJ_REF
Definition: xml_names.h:145
#define crm_log_xml_info(xml, text)
Definition: logging.h:378
#define PCMK__XA_IN_CCM
char uname[MAX_NAME]
Definition: cpg.c:53
GHashTable * attrs
#define PCMK_ACTION_PROMOTE
Definition: actions.h:57
#define PCMK_OPT_NO_QUORUM_POLICY
Definition: options.h:46
#define PCMK_OPT_HAVE_WATCHDOG
Definition: options.h:40
gboolean crm_is_true(const char *s)
Definition: strings.c:490
#define PCMK_XE_GROUP
Definition: xml_names.h:119
#define CRMD_JOINSTATE_PENDING
Definition: crm.h:136
#define PCMK__XE_NODE_STATE
#define PCMK_XA_LAST_GRANTED
Definition: xml_names.h:315
CRM_TRACE_INIT_DATA(pe_status)
Agent or dependency not available locally.
Definition: results.h:316
#define pcmk_ok
Definition: results.h:65
void pe__clear_resource_history(pcmk_resource_t *rsc, const pcmk_node_t *node)
Definition: pe_actions.c:1594
#define PCMK__XA_CALL_ID
void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, pcmk_scheduler_t *scheduler)
Definition: utils.c:701
#define pcmk__set_action_flags(action, flags_to_set)
void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index)
Definition: unpack.c:2628
GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler)
Definition: unpack.c:5038
#define PCMK_VALUE_STOP
Definition: options.h:212
gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
Definition: unpack.c:1388
Action is in progress.
Definition: results.h:308
unsigned long long flags
Definition: resources.h:69
void destroy_ticket(gpointer data)
Definition: utils.c:513
int crm_str_to_boolean(const char *s, int *ret)
Definition: strings.c:498
const char * pcmk__readable_interval(guint interval_ms)
Definition: iso8601.c:2206
gboolean online
Definition: nodes.h:50
#define SUB_XPATH_LRM_RESOURCE
Definition: unpack.c:2929
#define PCMK__META_CONTAINER
#define CRM_ATTR_SITE_NAME
Definition: crm.h:98
pcmk__ticket_t * ticket_new(const char *ticket_id, pcmk_scheduler_t *scheduler)
Definition: utils.c:525
Resource role is unknown.
Definition: roles.h:35
enum pcmk__on_fail pcmk__parse_on_fail(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, const char *value)
Definition: pe_actions.c:870
#define PCMK__META_MIGRATE_SOURCE
#define PCMK_VALUE_FREEZE
Definition: options.h:156
const char * parent
Definition: cib.c:27
Action cannot be attempted (e.g. shutdown)
Definition: results.h:318
struct pcmk__node_details * details
Definition: nodes.h:82
#define pcmk__assert_alloc(nmemb, size)
Definition: internal.h:257
time_t get_effective_time(pcmk_scheduler_t *scheduler)
Definition: utils.c:402
void freeXpathObject(xmlXPathObjectPtr xpathObj)
Definition: xpath.c:39
#define PCMK_VALUE_OFFLINE
Definition: options.h:185
#define PCMK_XE_INSTANCE_ATTRIBUTES
Definition: xml_names.h:122
#define CRM_ATTR_ID
Definition: crm.h:93
unsigned int timeout
Definition: pcmk_fence.c:34
void pe__add_param_check(const xmlNode *rsc_op, pcmk_resource_t *rsc, pcmk_node_t *node, enum pcmk__check_parameters, pcmk_scheduler_t *scheduler)
Definition: remote.c:192
#define XPATH_ENABLE_UNFENCING
Definition: unpack.c:196
#define PCMK_VALUE_REMOTE
Definition: options.h:201
Execution failed, may be retried.
Definition: results.h:313
#define crm_info(fmt, args...)
Definition: logging.h:367
void pcmk__insert_dup(GHashTable *table, const char *name, const char *value)
Definition: strings.c:703
#define pcmk__set_scheduler_flags(scheduler, flags_to_set)
#define PCMK_VALUE_DEMOTE
Definition: options.h:146
GHashTable * pcmk__unpack_action_meta(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *action_name, guint interval_ms, const xmlNode *action_config)
Definition: pe_actions.c:682
#define PCMK__XA_NODE_FENCED
char * clone_strip(const char *last_rsc_id)
Definition: unpack.c:1945
#define PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
Definition: options.h:68
enum pcmk__digest_result rc
#define PCMK__XA_NODE_IN_MAINTENANCE
GHashTable * utilization
pcmk_node_t * partial_migration_target
#define PCMK_SCORE_INFINITY
Integer score to use to represent "infinity".
Definition: scores.h:26
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
struct pcmk__node_assignment * assign
Definition: nodes.h:79