pacemaker  2.1.7-0f7f88312f
Scalable High-Availability cluster resource manager
unpack.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2023 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU Lesser General Public License
7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <stdio.h>
13 #include <string.h>
14 #include <glib.h>
15 #include <time.h>
16 
17 #include <crm/crm.h>
18 #include <crm/services.h>
19 #include <crm/msg_xml.h>
20 #include <crm/common/xml.h>
22 
23 #include <crm/common/util.h>
24 #include <crm/pengine/rules.h>
25 #include <crm/pengine/internal.h>
26 #include <pe_status_private.h>
27 
28 CRM_TRACE_INIT_DATA(pe_status);
29 
30 // A (parsed) resource action history entry
31 struct action_history {
32  pcmk_resource_t *rsc; // Resource that history is for
33  pcmk_node_t *node; // Node that history is for
34  xmlNode *xml; // History entry XML
35 
36  // Parsed from entry XML
37  const char *id; // XML ID of history entry
38  const char *key; // Operation key of action
39  const char *task; // Action name
40  const char *exit_reason; // Exit reason given for result
41  guint interval_ms; // Action interval
42  int call_id; // Call ID of action
43  int expected_exit_status; // Expected exit status of action
44  int exit_status; // Actual exit status of action
45  int execution_status; // Execution status of action
46 };
47 
48 /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
49  * use pe__set_working_set_flags()/pe__clear_working_set_flags() so that the
50  * flag is stringified more readably in log messages.
51  */
52 #define set_config_flag(scheduler, option, flag) do { \
53  const char *scf_value = pe_pref((scheduler)->config_hash, (option)); \
54  if (scf_value != NULL) { \
55  if (crm_is_true(scf_value)) { \
56  (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \
57  LOG_TRACE, "Scheduler", \
58  crm_system_name, (scheduler)->flags, \
59  (flag), #flag); \
60  } else { \
61  (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
62  LOG_TRACE, "Scheduler", \
63  crm_system_name, (scheduler)->flags, \
64  (flag), #flag); \
65  } \
66  } \
67  } while(0)
68 
69 static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
70  xmlNode *xml_op, xmlNode **last_failure,
71  enum action_fail_response *failed);
72 static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
73  pcmk_node_t *this_node);
74 static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
75  bool overwrite, pcmk_scheduler_t *scheduler);
76 static void determine_online_status(const xmlNode *node_state,
77  pcmk_node_t *this_node,
79 
80 static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
82 
83 
84 static gboolean
85 is_dangling_guest_node(pcmk_node_t *node)
86 {
87  /* we are looking for a remote-node that was supposed to be mapped to a
88  * container resource, but all traces of that container have disappeared
89  * from both the config and the status section. */
90  if (pe__is_guest_or_remote_node(node) &&
91  node->details->remote_rsc &&
92  node->details->remote_rsc->container == NULL &&
95  return TRUE;
96  }
97 
98  return FALSE;
99 }
100 
109 void
111  const char *reason, bool priority_delay)
112 {
113  CRM_CHECK(node, return);
114 
115  /* A guest node is fenced by marking its container as failed */
116  if (pe__is_guest_node(node)) {
118 
119  if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
120  if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
121  crm_notice("Not fencing guest node %s "
122  "(otherwise would because %s): "
123  "its guest resource %s is unmanaged",
124  pe__node_name(node), reason, rsc->id);
125  } else {
126  crm_warn("Guest node %s will be fenced "
127  "(by recovering its guest resource %s): %s",
128  pe__node_name(node), rsc->id, reason);
129 
130  /* We don't mark the node as unclean because that would prevent the
131  * node from running resources. We want to allow it to run resources
132  * in this transition if the recovery succeeds.
133  */
134  node->details->remote_requires_reset = TRUE;
137  }
138  }
139 
140  } else if (is_dangling_guest_node(node)) {
141  crm_info("Cleaning up dangling connection for guest node %s: "
142  "fencing was already done because %s, "
143  "and guest resource no longer exists",
144  pe__node_name(node), reason);
147 
148  } else if (pe__is_remote_node(node)) {
149  pcmk_resource_t *rsc = node->details->remote_rsc;
150 
151  if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
152  crm_notice("Not fencing remote node %s "
153  "(otherwise would because %s): connection is unmanaged",
154  pe__node_name(node), reason);
155  } else if(node->details->remote_requires_reset == FALSE) {
156  node->details->remote_requires_reset = TRUE;
157  crm_warn("Remote node %s %s: %s",
158  pe__node_name(node),
159  pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
160  reason);
161  }
162  node->details->unclean = TRUE;
163  // No need to apply `priority-fencing-delay` for remote nodes
164  pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
165 
166  } else if (node->details->unclean) {
167  crm_trace("Cluster node %s %s because %s",
168  pe__node_name(node),
169  pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
170  reason);
171 
172  } else {
173  crm_warn("Cluster node %s %s: %s",
174  pe__node_name(node),
175  pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
176  reason);
177  node->details->unclean = TRUE;
178  pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
179  }
180 }
181 
182 // @TODO xpaths can't handle templates, rules, or id-refs
183 
184 // nvpair with provides or requires set to unfencing
185 #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
186  "[(@" XML_NVPAIR_ATTR_NAME "='" PCMK_STONITH_PROVIDES "'" \
187  "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
188  "and @" XML_NVPAIR_ATTR_VALUE "='" PCMK__VALUE_UNFENCING "']"
189 
190 // unfencing in rsc_defaults or any resource
191 #define XPATH_ENABLE_UNFENCING \
192  "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
193  "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
194  "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
195  "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
196 
197 static void
198 set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
199 {
200  xmlXPathObjectPtr result = NULL;
201 
202  if (!pcmk_is_set(scheduler->flags, flag)) {
203  result = xpath_search(scheduler->input, xpath);
204  if (result && (numXpathResults(result) > 0)) {
206  }
208  }
209 }
210 
211 gboolean
213 {
214  const char *value = NULL;
215  GHashTable *config_hash = pcmk__strkey_table(free, free);
216 
217  pe_rule_eval_data_t rule_data = {
218  .node_hash = NULL,
219  .role = pcmk_role_unknown,
220  .now = scheduler->now,
221  .match_data = NULL,
222  .rsc_data = NULL,
223  .op_data = NULL
224  };
225 
226  scheduler->config_hash = config_hash;
227 
228  pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash,
229  CIB_OPTIONS_FIRST, FALSE, scheduler);
230 
232 
233  set_config_flag(scheduler, "enable-startup-probes",
236  crm_info("Startup probes: disabled (dangerous)");
237  }
238 
240  if (value && crm_is_true(value)) {
241  crm_info("Watchdog-based self-fencing will be performed via SBD if "
242  "fencing is required and stonith-watchdog-timeout is nonzero");
244  }
245 
246  /* Set certain flags via xpath here, so they can be used before the relevant
247  * configuration sections are unpacked.
248  */
250  scheduler);
251 
252  value = pe_pref(scheduler->config_hash, "stonith-timeout");
254  crm_debug("STONITH timeout: %d", scheduler->stonith_timeout);
255 
258  crm_debug("STONITH of failed nodes is enabled");
259  } else {
260  crm_debug("STONITH of failed nodes is disabled");
261  }
262 
264  "stonith-action");
265  if (!strcmp(scheduler->stonith_action, "poweroff")) {
267  "Support for stonith-action of 'poweroff' is deprecated "
268  "and will be removed in a future release (use 'off' instead)");
270  }
271  crm_trace("STONITH will %s nodes", scheduler->stonith_action);
272 
273  set_config_flag(scheduler, "concurrent-fencing",
276  crm_debug("Concurrent fencing is enabled");
277  } else {
278  crm_debug("Concurrent fencing is disabled");
279  }
280 
281  value = pe_pref(scheduler->config_hash,
283  if (value) {
285  / 1000;
286  crm_trace("Priority fencing delay is %ds",
288  }
289 
290  set_config_flag(scheduler, "stop-all-resources", pcmk_sched_stop_all);
291  crm_debug("Stop all active resources: %s",
293 
294  set_config_flag(scheduler, "symmetric-cluster",
297  crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
298  }
299 
300  value = pe_pref(scheduler->config_hash, "no-quorum-policy");
301 
302  if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) {
304 
305  } else if (pcmk__str_eq(value, "freeze", pcmk__str_casei)) {
307 
308  } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
310 
311  } else if (pcmk__str_eq(value, "suicide", pcmk__str_casei)) {
313  int do_panic = 0;
314 
316  &do_panic);
317  if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
319  } else {
320  crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
322  }
323  } else {
324  pcmk__config_err("Resetting no-quorum-policy to 'stop' because "
325  "fencing is disabled");
327  }
328 
329  } else {
331  }
332 
333  switch (scheduler->no_quorum_policy) {
335  crm_debug("On loss of quorum: Freeze resources");
336  break;
337  case pcmk_no_quorum_stop:
338  crm_debug("On loss of quorum: Stop ALL resources");
339  break;
341  crm_debug("On loss of quorum: "
342  "Demote promotable resources and stop other resources");
343  break;
345  crm_notice("On loss of quorum: Fence all remaining nodes");
346  break;
348  crm_notice("On loss of quorum: Ignore");
349  break;
350  }
351 
352  set_config_flag(scheduler, "stop-orphan-resources",
355  crm_trace("Orphan resources are stopped");
356  } else {
357  crm_trace("Orphan resources are ignored");
358  }
359 
360  set_config_flag(scheduler, "stop-orphan-actions",
363  crm_trace("Orphan resource actions are stopped");
364  } else {
365  crm_trace("Orphan resource actions are ignored");
366  }
367 
368  value = pe_pref(scheduler->config_hash, "remove-after-stop");
369  if (value != NULL) {
370  if (crm_is_true(value)) {
372 #ifndef PCMK__COMPAT_2_0
374  "Support for the remove-after-stop cluster property is"
375  " deprecated and will be removed in a future release");
376 #endif
377  } else {
380  }
381  }
382 
384  crm_trace("Maintenance mode: %s",
385  pcmk__btoa(pcmk_is_set(scheduler->flags,
387 
388  set_config_flag(scheduler, "start-failure-is-fatal",
391  crm_trace("Start failures are always fatal");
392  } else {
393  crm_trace("Start failures are handled by failcount");
394  }
395 
397  set_config_flag(scheduler, "startup-fencing",
399  }
401  crm_trace("Unseen nodes will be fenced");
402  } else {
403  pe_warn_once(pcmk__wo_blind, "Blind faith: not fencing unseen nodes");
404  }
405 
407 
409  "placement-strategy");
410  crm_trace("Placement strategy: %s", scheduler->placement_strategy);
411 
414  value = pe_pref(scheduler->config_hash,
417  crm_trace("Resources will be locked to nodes that were cleanly "
418  "shut down (locks expire after %s)",
420  } else {
421  crm_trace("Resources will not be locked to nodes that were cleanly "
422  "shut down");
423  }
424 
425  value = pe_pref(scheduler->config_hash,
428  if (scheduler->node_pending_timeout == 0) {
429  crm_trace("Do not fence pending nodes");
430  } else {
431  crm_trace("Fence pending nodes after %s",
433  * 1000));
434  }
435 
436  return TRUE;
437 }
438 
439 pcmk_node_t *
440 pe_create_node(const char *id, const char *uname, const char *type,
441  const char *score, pcmk_scheduler_t *scheduler)
442 {
443  pcmk_node_t *new_node = NULL;
444 
445  if (pe_find_node(scheduler->nodes, uname) != NULL) {
446  pcmk__config_warn("More than one node entry has name '%s'", uname);
447  }
448 
449  new_node = calloc(1, sizeof(pcmk_node_t));
450  if (new_node == NULL) {
451  return NULL;
452  }
453 
454  new_node->weight = char2score(score);
455  new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
456 
457  if (new_node->details == NULL) {
458  free(new_node);
459  return NULL;
460  }
461 
462  crm_trace("Creating node for entry %s/%s", uname, id);
463  new_node->details->id = id;
464  new_node->details->uname = uname;
465  new_node->details->online = FALSE;
466  new_node->details->shutdown = FALSE;
467  new_node->details->rsc_discovery_enabled = TRUE;
468  new_node->details->running_rsc = NULL;
469  new_node->details->data_set = scheduler;
470 
471  if (pcmk__str_eq(type, "member", pcmk__str_null_matches | pcmk__str_casei)) {
473 
474  } else if (pcmk__str_eq(type, "remote", pcmk__str_casei)) {
477 
478  } else {
479  /* @COMPAT 'ping' is the default for backward compatibility, but it
480  * should be changed to 'member' at a compatibility break
481  */
482  if (!pcmk__str_eq(type, "ping", pcmk__str_casei)) {
483  pcmk__config_warn("Node %s has unrecognized type '%s', "
484  "assuming 'ping'", pcmk__s(uname, "without name"),
485  type);
486  }
488  "Support for nodes of type 'ping' (such as %s) is "
489  "deprecated and will be removed in a future release",
490  pcmk__s(uname, "unnamed node"));
491  new_node->details->type = node_ping;
492  }
493 
494  new_node->details->attrs = pcmk__strkey_table(free, free);
495 
496  if (pe__is_guest_or_remote_node(new_node)) {
497  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
498  strdup("remote"));
499  } else {
500  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
501  strdup("cluster"));
502  }
503 
504  new_node->details->utilization = pcmk__strkey_table(free, free);
505  new_node->details->digest_cache = pcmk__strkey_table(free,
507 
508  scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
510  return new_node;
511 }
512 
513 static const char *
514 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
515 {
516  xmlNode *attr_set = NULL;
517  xmlNode *attr = NULL;
518 
519  const char *container_id = ID(xml_obj);
520  const char *remote_name = NULL;
521  const char *remote_server = NULL;
522  const char *remote_port = NULL;
523  const char *connect_timeout = "60s";
524  const char *remote_allow_migrate=NULL;
525  const char *is_managed = NULL;
526 
527  for (attr_set = pcmk__xe_first_child(xml_obj); attr_set != NULL;
528  attr_set = pcmk__xe_next(attr_set)) {
529 
530  if (!pcmk__str_eq((const char *)attr_set->name, XML_TAG_META_SETS,
531  pcmk__str_casei)) {
532  continue;
533  }
534 
535  for (attr = pcmk__xe_first_child(attr_set); attr != NULL;
536  attr = pcmk__xe_next(attr)) {
537  const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
538  const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
539 
540  if (pcmk__str_eq(name, XML_RSC_ATTR_REMOTE_NODE, pcmk__str_casei)) {
541  remote_name = value;
542  } else if (pcmk__str_eq(name, "remote-addr", pcmk__str_casei)) {
543  remote_server = value;
544  } else if (pcmk__str_eq(name, "remote-port", pcmk__str_casei)) {
545  remote_port = value;
546  } else if (pcmk__str_eq(name, "remote-connect-timeout", pcmk__str_casei)) {
547  connect_timeout = value;
548  } else if (pcmk__str_eq(name, "remote-allow-migrate", pcmk__str_casei)) {
549  remote_allow_migrate=value;
550  } else if (pcmk__str_eq(name, XML_RSC_ATTR_MANAGED, pcmk__str_casei)) {
551  is_managed = value;
552  }
553  }
554  }
555 
556  if (remote_name == NULL) {
557  return NULL;
558  }
559 
560  if (pe_find_resource(data->resources, remote_name) != NULL) {
561  return NULL;
562  }
563 
564  pe_create_remote_xml(parent, remote_name, container_id,
565  remote_allow_migrate, is_managed,
566  connect_timeout, remote_server, remote_port);
567  return remote_name;
568 }
569 
570 static void
571 handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
572 {
573  if ((new_node->details->type == pcmk_node_variant_remote)
574  && (new_node->details->remote_rsc == NULL)) {
575  /* Ignore fencing for remote nodes that don't have a connection resource
576  * associated with them. This happens when remote node entries get left
577  * in the nodes section after the connection resource is removed.
578  */
579  return;
580  }
581 
583  // All nodes are unclean until we've seen their status entry
584  new_node->details->unclean = TRUE;
585 
586  } else {
587  // Blind faith ...
588  new_node->details->unclean = FALSE;
589  }
590 
591  /* We need to be able to determine if a node's status section
592  * exists or not separate from whether the node is unclean. */
593  new_node->details->unseen = TRUE;
594 }
595 
596 gboolean
598 {
599  xmlNode *xml_obj = NULL;
600  pcmk_node_t *new_node = NULL;
601  const char *id = NULL;
602  const char *uname = NULL;
603  const char *type = NULL;
604  const char *score = NULL;
605 
606  for (xml_obj = pcmk__xe_first_child(xml_nodes); xml_obj != NULL;
607  xml_obj = pcmk__xe_next(xml_obj)) {
608 
609  if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, pcmk__str_none)) {
610  new_node = NULL;
611 
612  id = crm_element_value(xml_obj, XML_ATTR_ID);
615  score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
616  crm_trace("Processing node %s/%s", uname, id);
617 
618  if (id == NULL) {
620  "> entry in configuration without id");
621  continue;
622  }
623  new_node = pe_create_node(id, uname, type, score, scheduler);
624 
625  if (new_node == NULL) {
626  return FALSE;
627  }
628 
629  handle_startup_fencing(scheduler, new_node);
630 
631  add_node_attrs(xml_obj, new_node, FALSE, scheduler);
632 
633  crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
634  }
635  }
636 
637  if (scheduler->localhost
638  && (pe_find_node(scheduler->nodes, scheduler->localhost) == NULL)) {
639  crm_info("Creating a fake local node");
641  scheduler);
642  }
643 
644  return TRUE;
645 }
646 
647 static void
648 setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
649 {
650  const char *container_id = NULL;
651 
652  if (rsc->children) {
653  g_list_foreach(rsc->children, (GFunc) setup_container, scheduler);
654  return;
655  }
656 
657  container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
658  if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) {
660  container_id);
661 
662  if (container) {
663  rsc->container = container;
665  container->fillers = g_list_append(container->fillers, rsc);
666  pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
667  } else {
668  pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
669  }
670  }
671 }
672 
673 gboolean
675 {
676  xmlNode *xml_obj = NULL;
677 
678  /* Create remote nodes and guest nodes from the resource configuration
679  * before unpacking resources.
680  */
681  for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL;
682  xml_obj = pcmk__xe_next(xml_obj)) {
683 
684  const char *new_node_id = NULL;
685 
686  /* Check for remote nodes, which are defined by ocf:pacemaker:remote
687  * primitives.
688  */
689  if (xml_contains_remote_node(xml_obj)) {
690  new_node_id = ID(xml_obj);
691  /* The "pe_find_node" check is here to make sure we don't iterate over
692  * an expanded node that has already been added to the node list. */
693  if (new_node_id
694  && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) {
695  crm_trace("Found remote node %s defined by resource %s",
696  new_node_id, ID(xml_obj));
697  pe_create_node(new_node_id, new_node_id, "remote", NULL,
698  scheduler);
699  }
700  continue;
701  }
702 
703  /* Check for guest nodes, which are defined by special meta-attributes
704  * of a primitive of any type (for example, VirtualDomain or Xen).
705  */
706  if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, pcmk__str_none)) {
707  /* This will add an ocf:pacemaker:remote primitive to the
708  * configuration for the guest node's connection, to be unpacked
709  * later.
710  */
711  new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
712  scheduler);
713  if (new_node_id
714  && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) {
715  crm_trace("Found guest node %s in resource %s",
716  new_node_id, ID(xml_obj));
717  pe_create_node(new_node_id, new_node_id, "remote", NULL,
718  scheduler);
719  }
720  continue;
721  }
722 
723  /* Check for guest nodes inside a group. Clones are currently not
724  * supported as guest nodes.
725  */
726  if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, pcmk__str_none)) {
727  xmlNode *xml_obj2 = NULL;
728  for (xml_obj2 = pcmk__xe_first_child(xml_obj); xml_obj2 != NULL;
729  xml_obj2 = pcmk__xe_next(xml_obj2)) {
730 
731  new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
732  scheduler);
733 
734  if (new_node_id
735  && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) {
736  crm_trace("Found guest node %s in resource %s inside group %s",
737  new_node_id, ID(xml_obj2), ID(xml_obj));
738  pe_create_node(new_node_id, new_node_id, "remote", NULL,
739  scheduler);
740  }
741  }
742  }
743  }
744  return TRUE;
745 }
746 
747 /* Call this after all the nodes and resources have been
748  * unpacked, but before the status section is read.
749  *
750  * A remote node's online status is reflected by the state
751  * of the remote node's connection resource. We need to link
752  * the remote node to this connection resource so we can have
753  * easy access to the connection resource during the scheduler calculations.
754  */
755 static void
756 link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
757 {
758  pcmk_node_t *remote_node = NULL;
759 
760  if (new_rsc->is_remote_node == FALSE) {
761  return;
762  }
763 
765  /* remote_nodes and remote_resources are not linked in quick location calculations */
766  return;
767  }
768 
769  remote_node = pe_find_node(scheduler->nodes, new_rsc->id);
770  CRM_CHECK(remote_node != NULL, return);
771 
772  pe_rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
773  new_rsc->id, pe__node_name(remote_node));
774  remote_node->details->remote_rsc = new_rsc;
775 
776  if (new_rsc->container == NULL) {
777  /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
778  * the same as is done for cluster nodes.
779  */
780  handle_startup_fencing(scheduler, remote_node);
781 
782  } else {
783  /* pe_create_node() marks the new node as "remote" or "cluster"; now
784  * that we know the node is a guest node, update it correctly.
785  */
786  g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
787  strdup("container"));
788  }
789 }
790 
791 static void
792 destroy_tag(gpointer data)
793 {
794  pcmk_tag_t *tag = data;
795 
796  if (tag) {
797  free(tag->id);
798  g_list_free_full(tag->refs, free);
799  free(tag);
800  }
801 }
802 
815 gboolean
816 unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
817 {
818  xmlNode *xml_obj = NULL;
819  GList *gIter = NULL;
820 
821  scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag);
822 
823  for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL;
824  xml_obj = pcmk__xe_next(xml_obj)) {
825 
826  pcmk_resource_t *new_rsc = NULL;
827  const char *id = ID(xml_obj);
828 
829  if (pcmk__str_empty(id)) {
830  pcmk__config_err("Ignoring <%s> resource without ID",
831  xml_obj->name);
832  continue;
833  }
834 
835  if (pcmk__str_eq((const char *) xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE,
836  pcmk__str_none)) {
837  if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id,
838  NULL, NULL) == FALSE) {
839  /* Record the template's ID for the knowledge of its existence anyway. */
840  g_hash_table_insert(scheduler->template_rsc_sets, strdup(id),
841  NULL);
842  }
843  continue;
844  }
845 
846  crm_trace("Unpacking <%s " XML_ATTR_ID "='%s'>",
847  xml_obj->name, id);
848  if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
849  scheduler) == pcmk_rc_ok) {
850  scheduler->resources = g_list_append(scheduler->resources, new_rsc);
851  pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
852 
853  } else {
854  pcmk__config_err("Ignoring <%s> resource '%s' "
855  "because configuration is invalid",
856  xml_obj->name, id);
857  }
858  }
859 
860  for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) {
861  pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
862 
863  setup_container(rsc, scheduler);
864  link_rsc2remotenode(scheduler, rsc);
865  }
866 
867  scheduler->resources = g_list_sort(scheduler->resources,
870  /* Ignore */
871 
874 
875  pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
876  pcmk__config_err("Either configure some or disable STONITH with the stonith-enabled option");
877  pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
878  }
879 
880  return TRUE;
881 }
882 
883 gboolean
885 {
886  xmlNode *xml_tag = NULL;
887 
888  scheduler->tags = pcmk__strkey_table(free, destroy_tag);
889 
890  for (xml_tag = pcmk__xe_first_child(xml_tags); xml_tag != NULL;
891  xml_tag = pcmk__xe_next(xml_tag)) {
892 
893  xmlNode *xml_obj_ref = NULL;
894  const char *tag_id = ID(xml_tag);
895 
896  if (!pcmk__str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, pcmk__str_none)) {
897  continue;
898  }
899 
900  if (tag_id == NULL) {
901  pcmk__config_err("Ignoring <%s> without " XML_ATTR_ID,
902  (const char *) xml_tag->name);
903  continue;
904  }
905 
906  for (xml_obj_ref = pcmk__xe_first_child(xml_tag); xml_obj_ref != NULL;
907  xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
908 
909  const char *obj_ref = ID(xml_obj_ref);
910 
911  if (!pcmk__str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, pcmk__str_none)) {
912  continue;
913  }
914 
915  if (obj_ref == NULL) {
916  pcmk__config_err("Ignoring <%s> for tag '%s' without " XML_ATTR_ID,
917  xml_obj_ref->name, tag_id);
918  continue;
919  }
920 
921  if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) {
922  return FALSE;
923  }
924  }
925  }
926 
927  return TRUE;
928 }
929 
930 /* The ticket state section:
931  * "/cib/status/tickets/ticket_state" */
932 static gboolean
933 unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler)
934 {
935  const char *ticket_id = NULL;
936  const char *granted = NULL;
937  const char *last_granted = NULL;
938  const char *standby = NULL;
939  xmlAttrPtr xIter = NULL;
940 
941  pcmk_ticket_t *ticket = NULL;
942 
943  ticket_id = ID(xml_ticket);
944  if (pcmk__str_empty(ticket_id)) {
945  return FALSE;
946  }
947 
948  crm_trace("Processing ticket state for %s", ticket_id);
949 
950  ticket = g_hash_table_lookup(scheduler->tickets, ticket_id);
951  if (ticket == NULL) {
952  ticket = ticket_new(ticket_id, scheduler);
953  if (ticket == NULL) {
954  return FALSE;
955  }
956  }
957 
958  for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
959  const char *prop_name = (const char *)xIter->name;
960  const char *prop_value = pcmk__xml_attr_value(xIter);
961 
962  if (pcmk__str_eq(prop_name, XML_ATTR_ID, pcmk__str_none)) {
963  continue;
964  }
965  g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
966  }
967 
968  granted = g_hash_table_lookup(ticket->state, "granted");
969  if (granted && crm_is_true(granted)) {
970  ticket->granted = TRUE;
971  crm_info("We have ticket '%s'", ticket->id);
972  } else {
973  ticket->granted = FALSE;
974  crm_info("We do not have ticket '%s'", ticket->id);
975  }
976 
977  last_granted = g_hash_table_lookup(ticket->state, "last-granted");
978  if (last_granted) {
979  long long last_granted_ll;
980 
981  pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
982  ticket->last_granted = (time_t) last_granted_ll;
983  }
984 
985  standby = g_hash_table_lookup(ticket->state, "standby");
986  if (standby && crm_is_true(standby)) {
987  ticket->standby = TRUE;
988  if (ticket->granted) {
989  crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
990  }
991  } else {
992  ticket->standby = FALSE;
993  }
994 
995  crm_trace("Done with ticket state for %s", ticket_id);
996 
997  return TRUE;
998 }
999 
1000 static gboolean
1001 unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler)
1002 {
1003  xmlNode *xml_obj = NULL;
1004 
1005  for (xml_obj = pcmk__xe_first_child(xml_tickets); xml_obj != NULL;
1006  xml_obj = pcmk__xe_next(xml_obj)) {
1007 
1008  if (!pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, pcmk__str_none)) {
1009  continue;
1010  }
1011  unpack_ticket_state(xml_obj, scheduler);
1012  }
1013 
1014  return TRUE;
1015 }
1016 
1017 static void
1018 unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
1020 {
1021  const char *resource_discovery_enabled = NULL;
1022  const xmlNode *attrs = NULL;
1023  pcmk_resource_t *rsc = NULL;
1024 
1025  if (!pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
1026  return;
1027  }
1028 
1029  if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) {
1030  return;
1031  }
1032  crm_trace("Processing Pacemaker Remote node %s", pe__node_name(this_node));
1033 
1035  &(this_node->details->remote_maintenance), 0);
1036 
1037  rsc = this_node->details->remote_rsc;
1038  if (this_node->details->remote_requires_reset == FALSE) {
1039  this_node->details->unclean = FALSE;
1040  this_node->details->unseen = FALSE;
1041  }
1042  attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
1043  add_node_attrs(attrs, this_node, TRUE, scheduler);
1044 
1045  if (pe__shutdown_requested(this_node)) {
1046  crm_info("%s is shutting down", pe__node_name(this_node));
1047  this_node->details->shutdown = TRUE;
1048  }
1049 
1050  if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
1051  crm_info("%s is in standby mode", pe__node_name(this_node));
1052  this_node->details->standby = TRUE;
1053  }
1054 
1055  if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
1056  ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) {
1057  crm_info("%s is in maintenance mode", pe__node_name(this_node));
1058  this_node->details->maintenance = TRUE;
1059  }
1060 
1061  resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
1062  if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
1063  if (pe__is_remote_node(this_node)
1066  " attribute on Pacemaker Remote node %s"
1067  " because fencing is disabled",
1068  pe__node_name(this_node));
1069  } else {
1070  /* This is either a remote node with fencing enabled, or a guest
1071  * node. We don't care whether fencing is enabled when fencing guest
1072  * nodes, because they are "fenced" by recovering their containing
1073  * resource.
1074  */
1075  crm_info("%s has resource discovery disabled",
1076  pe__node_name(this_node));
1077  this_node->details->rsc_discovery_enabled = FALSE;
1078  }
1079  }
1080 }
1081 
1090 static void
1091 unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
1093 {
1094  const char *discovery = NULL;
1095  const xmlNode *attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS,
1096  FALSE);
1097 
1098  add_node_attrs(attrs, node, TRUE, scheduler);
1099 
1100  if (crm_is_true(pe_node_attribute_raw(node, "standby"))) {
1101  crm_info("%s is in standby mode", pe__node_name(node));
1102  node->details->standby = TRUE;
1103  }
1104 
1105  if (crm_is_true(pe_node_attribute_raw(node, "maintenance"))) {
1106  crm_info("%s is in maintenance mode", pe__node_name(node));
1107  node->details->maintenance = TRUE;
1108  }
1109 
1111  if ((discovery != NULL) && !crm_is_true(discovery)) {
1113  " attribute for %s because disabling resource discovery "
1114  "is not allowed for cluster nodes", pe__node_name(node));
1115  }
1116 }
1117 
1130 static void
1131 unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
1132 {
1133  const char *id = NULL;
1134  const char *uname = NULL;
1135  pcmk_node_t *this_node = NULL;
1136 
1137  id = crm_element_value(state, XML_ATTR_ID);
1138  if (id == NULL) {
1139  crm_warn("Ignoring malformed " XML_CIB_TAG_STATE " entry without "
1140  XML_ATTR_ID);
1141  return;
1142  }
1143 
1145  if (uname == NULL) {
1146  /* If a joining peer makes the cluster acquire the quorum from corosync
1147  * meanwhile it has not joined CPG membership of pacemaker-controld yet,
1148  * it's possible that the created node_state entry doesn't have an uname
1149  * yet. We should recognize the node as `pending` and wait for it to
1150  * join CPG.
1151  */
1152  crm_trace("Handling " XML_CIB_TAG_STATE " entry with id=\"%s\" without "
1153  XML_ATTR_UNAME, id);
1154  }
1155 
1156  this_node = pe_find_node_any(scheduler->nodes, id, uname);
1157  if (this_node == NULL) {
1158  pcmk__config_warn("Ignoring recorded node state for id=\"%s\" (%s) "
1159  "because it is no longer in the configuration",
1160  id, pcmk__s(uname, "uname unknown"));
1161  return;
1162  }
1163 
1164  if (pe__is_guest_or_remote_node(this_node)) {
1165  /* We can't determine the online status of Pacemaker Remote nodes until
1166  * after all resource history has been unpacked. In this first pass, we
1167  * do need to mark whether the node has been fenced, as this plays a
1168  * role during unpacking cluster node resource state.
1169  */
1171  &(this_node->details->remote_was_fenced), 0);
1172  return;
1173  }
1174 
1175  unpack_transient_attributes(state, this_node, scheduler);
1176 
1177  /* Provisionally mark this cluster node as clean. We have at least seen it
1178  * in the current cluster's lifetime.
1179  */
1180  this_node->details->unclean = FALSE;
1181  this_node->details->unseen = FALSE;
1182 
1183  crm_trace("Determining online status of cluster node %s (id %s)",
1184  pe__node_name(this_node), id);
1185  determine_online_status(state, this_node, scheduler);
1186 
1188  && this_node->details->online
1190  /* Everything else should flow from this automatically
1191  * (at least until the scheduler becomes able to migrate off
1192  * healthy resources)
1193  */
1194  pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1195  FALSE);
1196  }
1197 }
1198 
1216 static int
1217 unpack_node_history(const xmlNode *status, bool fence,
1219 {
1220  int rc = pcmk_rc_ok;
1221 
1222  // Loop through all node_state entries in CIB status
1223  for (const xmlNode *state = first_named_child(status, XML_CIB_TAG_STATE);
1224  state != NULL; state = crm_next_same_xml(state)) {
1225 
1226  const char *id = ID(state);
1227  const char *uname = crm_element_value(state, XML_ATTR_UNAME);
1228  pcmk_node_t *this_node = NULL;
1229 
1230  if ((id == NULL) || (uname == NULL)) {
1231  // Warning already logged in first pass through status section
1232  crm_trace("Not unpacking resource history from malformed "
1233  XML_CIB_TAG_STATE " without id and/or uname");
1234  continue;
1235  }
1236 
1237  this_node = pe_find_node_any(scheduler->nodes, id, uname);
1238  if (this_node == NULL) {
1239  // Warning already logged in first pass through status section
1240  crm_trace("Not unpacking resource history for node %s because "
1241  "no longer in configuration", id);
1242  continue;
1243  }
1244 
1245  if (this_node->details->unpacked) {
1246  crm_trace("Not unpacking resource history for node %s because "
1247  "already unpacked", id);
1248  continue;
1249  }
1250 
1251  if (fence) {
1252  // We're processing all remaining nodes
1253 
1254  } else if (pe__is_guest_node(this_node)) {
1255  /* We can unpack a guest node's history only after we've unpacked
1256  * other resource history to the point that we know that the node's
1257  * connection and containing resource are both up.
1258  */
1259  pcmk_resource_t *rsc = this_node->details->remote_rsc;
1260 
1261  if ((rsc == NULL) || (rsc->role != pcmk_role_started)
1262  || (rsc->container->role != pcmk_role_started)) {
1263  crm_trace("Not unpacking resource history for guest node %s "
1264  "because container and connection are not known to "
1265  "be up", id);
1266  continue;
1267  }
1268 
1269  } else if (pe__is_remote_node(this_node)) {
1270  /* We can unpack a remote node's history only after we've unpacked
1271  * other resource history to the point that we know that the node's
1272  * connection is up, with the exception of when shutdown locks are
1273  * in use.
1274  */
1275  pcmk_resource_t *rsc = this_node->details->remote_rsc;
1276 
1277  if ((rsc == NULL)
1279  && (rsc->role != pcmk_role_started))) {
1280  crm_trace("Not unpacking resource history for remote node %s "
1281  "because connection is not known to be up", id);
1282  continue;
1283  }
1284 
1285  /* If fencing and shutdown locks are disabled and we're not processing
1286  * unseen nodes, then we don't want to unpack offline nodes until online
1287  * nodes have been unpacked. This allows us to number active clone
1288  * instances first.
1289  */
1290  } else if (!pcmk_any_flags_set(scheduler->flags,
1293  && !this_node->details->online) {
1294  crm_trace("Not unpacking resource history for offline "
1295  "cluster node %s", id);
1296  continue;
1297  }
1298 
1299  if (pe__is_guest_or_remote_node(this_node)) {
1300  determine_remote_online_status(scheduler, this_node);
1301  unpack_handle_remote_attrs(this_node, state, scheduler);
1302  }
1303 
1304  crm_trace("Unpacking resource history for %snode %s",
1305  (fence? "unseen " : ""), id);
1306 
1307  this_node->details->unpacked = TRUE;
1308  unpack_node_lrm(this_node, state, scheduler);
1309 
1310  rc = EAGAIN; // Other node histories might depend on this one
1311  }
1312  return rc;
1313 }
1314 
1315 /* remove nodes that are down, stopping */
1316 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1317 /* anything else? */
1318 gboolean
1320 {
1321  xmlNode *state = NULL;
1322 
1323  crm_trace("Beginning unpack");
1324 
1325  if (scheduler->tickets == NULL) {
1327  }
1328 
1329  for (state = pcmk__xe_first_child(status); state != NULL;
1330  state = pcmk__xe_next(state)) {
1331 
1332  if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, pcmk__str_none)) {
1333  unpack_tickets_state((xmlNode *) state, scheduler);
1334 
1335  } else if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
1336  unpack_node_state(state, scheduler);
1337  }
1338  }
1339 
1340  while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1341  crm_trace("Another pass through node resource histories is needed");
1342  }
1343 
1344  // Now catch any nodes we didn't see
1345  unpack_node_history(status,
1348  scheduler);
1349 
1350  /* Now that we know where resources are, we can schedule stops of containers
1351  * with failed bundle connections
1352  */
1353  if (scheduler->stop_needed != NULL) {
1354  for (GList *item = scheduler->stop_needed; item; item = item->next) {
1355  pcmk_resource_t *container = item->data;
1356  pcmk_node_t *node = pe__current_node(container);
1357 
1358  if (node) {
1359  stop_action(container, node, FALSE);
1360  }
1361  }
1362  g_list_free(scheduler->stop_needed);
1363  scheduler->stop_needed = NULL;
1364  }
1365 
1366  /* Now that we know status of all Pacemaker Remote connections and nodes,
1367  * we can stop connections for node shutdowns, and check the online status
1368  * of remote/guest nodes that didn't have any node history to unpack.
1369  */
1370  for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1371  pcmk_node_t *this_node = gIter->data;
1372 
1373  if (!pe__is_guest_or_remote_node(this_node)) {
1374  continue;
1375  }
1376  if (this_node->details->shutdown
1377  && (this_node->details->remote_rsc != NULL)) {
1379  "remote shutdown");
1380  }
1381  if (!this_node->details->unpacked) {
1382  determine_remote_online_status(scheduler, this_node);
1383  }
1384  }
1385 
1386  return TRUE;
1387 }
1388 
1400 static long long
1401 unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
1402 {
1403  const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
1404  int member = 0;
1405 
1406  if (member_time == NULL) {
1407  return -1LL;
1408 
1409  } else if (crm_str_to_boolean(member_time, &member) == 1) {
1410  /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
1411  * recorded as a boolean for a DC < 2.1.7, or the node is pending
1412  * shutdown and has left the CPG, in which case it was set to 1 to avoid
1413  * fencing for node-pending-timeout.
1414  *
1415  * We return the effective time for in_ccm=1 because what's important to
1416  * avoid fencing is that effective time minus this value is less than
1417  * the pending node timeout.
1418  */
1419  return member? (long long) get_effective_time(scheduler) : 0LL;
1420 
1421  } else {
1422  long long when_member = 0LL;
1423 
1424  if ((pcmk__scan_ll(member_time, &when_member,
1425  0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1426  crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
1427  " in " XML_CIB_TAG_STATE " entry", member_time);
1428  return -1LL;
1429  }
1430  return when_member;
1431  }
1432 }
1433 
1443 static long long
1444 unpack_node_online(const xmlNode *node_state)
1445 {
1446  const char *peer_time = crm_element_value(node_state, PCMK__XA_CRMD);
1447 
1448  // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
1449  if (pcmk__str_eq(peer_time, OFFLINESTATUS,
1451  return 0LL;
1452 
1453  } else if (pcmk__str_eq(peer_time, ONLINESTATUS, pcmk__str_casei)) {
1454  return 1LL;
1455 
1456  } else {
1457  long long when_online = 0LL;
1458 
1459  if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1460  || (when_online < 0)) {
1461  crm_warn("Unrecognized value '%s' for " PCMK__XA_CRMD " in "
1462  XML_CIB_TAG_STATE " entry, assuming offline", peer_time);
1463  return 0LL;
1464  }
1465  return when_online;
1466  }
1467 }
1468 
1478 static bool
1479 unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
1480 {
1481  long long value = 0LL;
1482  int value_i = 0;
1483  const char *value_s = pe_node_attribute_raw(node, PCMK_NODE_ATTR_TERMINATE);
1484 
1485  // Value may be boolean or an epoch time
1486  if (crm_str_to_boolean(value_s, &value_i) == 1) {
1487  return (value_i != 0);
1488  }
1489  if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) {
1490  return (value > 0);
1491  }
1492  crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1493  "node attribute for %s", value_s, pe__node_name(node));
1494  return false;
1495 }
1496 
1497 static gboolean
1498 determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
1499  const xmlNode *node_state,
1500  pcmk_node_t *this_node)
1501 {
1502  gboolean online = FALSE;
1503  const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1504  const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED);
1505  long long when_member = unpack_node_member(node_state, scheduler);
1506  long long when_online = unpack_node_online(node_state);
1507 
1508  if (when_member <= 0) {
1509  crm_trace("Node %s is %sdown", pe__node_name(this_node),
1510  ((when_member < 0)? "presumed " : ""));
1511 
1512  } else if (when_online > 0) {
1513  if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1514  online = TRUE;
1515  } else {
1516  crm_debug("Node %s is not ready to run resources: %s",
1517  pe__node_name(this_node), join);
1518  }
1519 
1520  } else if (this_node->details->expected_up == FALSE) {
1521  crm_trace("Node %s controller is down: "
1522  "member@%lld online@%lld join=%s expected=%s",
1523  pe__node_name(this_node), when_member, when_online,
1524  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1525 
1526  } else {
1527  /* mark it unclean */
1528  pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1529  crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
1530  pe__node_name(this_node), when_member, when_online,
1531  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1532  }
1533  return online;
1534 }
1535 
1549 static inline bool
1550 pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1551  long long when_member, long long when_online)
1552 {
1553  if ((scheduler->node_pending_timeout > 0)
1554  && (when_member > 0) && (when_online <= 0)) {
1555  // There is a timeout on pending nodes, and node is pending
1556 
1557  time_t timeout = when_member + scheduler->node_pending_timeout;
1558 
1559  if (get_effective_time(node->details->data_set) >= timeout) {
1560  return true; // Node has timed out
1561  }
1562 
1563  // Node is pending, but still has time
1564  pe__update_recheck_time(timeout, scheduler, "pending node timeout");
1565  }
1566  return false;
1567 }
1568 
1569 static bool
1570 determine_online_status_fencing(pcmk_scheduler_t *scheduler,
1571  const xmlNode *node_state,
1572  pcmk_node_t *this_node)
1573 {
1574  bool termination_requested = unpack_node_terminate(this_node, node_state);
1575  const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1576  const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED);
1577  long long when_member = unpack_node_member(node_state, scheduler);
1578  long long when_online = unpack_node_online(node_state);
1579 
1580 /*
1581  - PCMK__XA_JOIN ::= member|down|pending|banned
1582  - PCMK__XA_EXPECTED ::= member|down
1583 
1584  @COMPAT with entries recorded for DCs < 2.1.7
1585  - PCMK__XA_IN_CCM ::= true|false
1586  - PCMK__XA_CRMD ::= online|offline
1587 
1588  Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
1589  - PCMK__XA_IN_CCM ::= <timestamp>|0
1590  Since when node has been a cluster member. A value 0 of means the node is not
1591  a cluster member.
1592 
1593  - PCMK__XA_CRMD ::= <timestamp>|0
1594  Since when peer has been online in CPG. A value 0 means the peer is offline
1595  in CPG.
1596 */
1597 
1598  crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1599  pe__node_name(this_node), when_member, when_online,
1600  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1601  (termination_requested? " (termination requested)" : ""));
1602 
1603  if (this_node->details->shutdown) {
1604  crm_debug("%s is shutting down", pe__node_name(this_node));
1605 
1606  /* Slightly different criteria since we can't shut down a dead peer */
1607  return (when_online > 0);
1608  }
1609 
1610  if (when_member < 0) {
1611  pe_fence_node(scheduler, this_node,
1612  "peer has not been seen by the cluster", FALSE);
1613  return false;
1614  }
1615 
1616  if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1617  pe_fence_node(scheduler, this_node,
1618  "peer failed Pacemaker membership criteria", FALSE);
1619 
1620  } else if (termination_requested) {
1621  if ((when_member <= 0) && (when_online <= 0)
1622  && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1623  crm_info("%s was fenced as requested", pe__node_name(this_node));
1624  return false;
1625  }
1626  pe_fence_node(scheduler, this_node, "fencing was requested", false);
1627 
1628  } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1630 
1631  if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1632  pe_fence_node(scheduler, this_node,
1633  "peer pending timed out on joining the process group",
1634  FALSE);
1635 
1636  } else if ((when_member > 0) || (when_online > 0)) {
1637  crm_info("- %s is not ready to run resources",
1638  pe__node_name(this_node));
1639  this_node->details->standby = TRUE;
1640  this_node->details->pending = TRUE;
1641 
1642  } else {
1643  crm_trace("%s is down or still coming up",
1644  pe__node_name(this_node));
1645  }
1646 
1647  } else if (when_member <= 0) {
1648  // Consider `priority-fencing-delay` for lost nodes
1649  pe_fence_node(scheduler, this_node,
1650  "peer is no longer part of the cluster", TRUE);
1651 
1652  } else if (when_online <= 0) {
1653  pe_fence_node(scheduler, this_node,
1654  "peer process is no longer available", FALSE);
1655 
1656  /* Everything is running at this point, now check join state */
1657 
1658  } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1659  crm_info("%s is active", pe__node_name(this_node));
1660 
1661  } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
1662  CRMD_JOINSTATE_DOWN, NULL)) {
1663  crm_info("%s is not ready to run resources", pe__node_name(this_node));
1664  this_node->details->standby = TRUE;
1665  this_node->details->pending = TRUE;
1666 
1667  } else {
1668  pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1669  FALSE);
1670  }
1671 
1672  return (when_member > 0);
1673 }
1674 
1675 static void
1676 determine_remote_online_status(pcmk_scheduler_t *scheduler,
1677  pcmk_node_t *this_node)
1678 {
1679  pcmk_resource_t *rsc = this_node->details->remote_rsc;
1680  pcmk_resource_t *container = NULL;
1681  pcmk_node_t *host = NULL;
1682 
1683  /* If there is a node state entry for a (former) Pacemaker Remote node
1684  * but no resource creating that node, the node's connection resource will
1685  * be NULL. Consider it an offline remote node in that case.
1686  */
1687  if (rsc == NULL) {
1688  this_node->details->online = FALSE;
1689  goto remote_online_done;
1690  }
1691 
1692  container = rsc->container;
1693 
1694  if (container && pcmk__list_of_1(rsc->running_on)) {
1695  host = rsc->running_on->data;
1696  }
1697 
1698  /* If the resource is currently started, mark it online. */
1699  if (rsc->role == pcmk_role_started) {
1700  crm_trace("%s node %s presumed ONLINE because connection resource is started",
1701  (container? "Guest" : "Remote"), this_node->details->id);
1702  this_node->details->online = TRUE;
1703  }
1704 
1705  /* consider this node shutting down if transitioning start->stop */
1706  if ((rsc->role == pcmk_role_started)
1707  && (rsc->next_role == pcmk_role_stopped)) {
1708 
1709  crm_trace("%s node %s shutting down because connection resource is stopping",
1710  (container? "Guest" : "Remote"), this_node->details->id);
1711  this_node->details->shutdown = TRUE;
1712  }
1713 
1714  /* Now check all the failure conditions. */
1715  if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) {
1716  crm_trace("Guest node %s UNCLEAN because guest resource failed",
1717  this_node->details->id);
1718  this_node->details->online = FALSE;
1719  this_node->details->remote_requires_reset = TRUE;
1720 
1721  } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
1722  crm_trace("%s node %s OFFLINE because connection resource failed",
1723  (container? "Guest" : "Remote"), this_node->details->id);
1724  this_node->details->online = FALSE;
1725 
1726  } else if ((rsc->role == pcmk_role_stopped)
1727  || ((container != NULL)
1728  && (container->role == pcmk_role_stopped))) {
1729 
1730  crm_trace("%s node %s OFFLINE because its resource is stopped",
1731  (container? "Guest" : "Remote"), this_node->details->id);
1732  this_node->details->online = FALSE;
1733  this_node->details->remote_requires_reset = FALSE;
1734 
1735  } else if (host && (host->details->online == FALSE)
1736  && host->details->unclean) {
1737  crm_trace("Guest node %s UNCLEAN because host is unclean",
1738  this_node->details->id);
1739  this_node->details->online = FALSE;
1740  this_node->details->remote_requires_reset = TRUE;
1741  }
1742 
1743 remote_online_done:
1744  crm_trace("Remote node %s online=%s",
1745  this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1746 }
1747 
1748 static void
1749 determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
1751 {
1752  gboolean online = FALSE;
1753  const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED);
1754 
1755  CRM_CHECK(this_node != NULL, return);
1756 
1757  this_node->details->shutdown = FALSE;
1758  this_node->details->expected_up = FALSE;
1759 
1760  if (pe__shutdown_requested(this_node)) {
1761  this_node->details->shutdown = TRUE;
1762 
1763  } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1764  this_node->details->expected_up = TRUE;
1765  }
1766 
1767  if (this_node->details->type == node_ping) {
1768  this_node->details->unclean = FALSE;
1769  online = FALSE; /* As far as resource management is concerned,
1770  * the node is safely offline.
1771  * Anyone caught abusing this logic will be shot
1772  */
1773 
1775  online = determine_online_status_no_fencing(scheduler, node_state,
1776  this_node);
1777 
1778  } else {
1779  online = determine_online_status_fencing(scheduler, node_state,
1780  this_node);
1781  }
1782 
1783  if (online) {
1784  this_node->details->online = TRUE;
1785 
1786  } else {
1787  /* remove node from contention */
1788  this_node->fixed = TRUE; // @COMPAT deprecated and unused
1789  this_node->weight = -INFINITY;
1790  }
1791 
1792  if (online && this_node->details->shutdown) {
1793  /* don't run resources here */
1794  this_node->fixed = TRUE; // @COMPAT deprecated and unused
1795  this_node->weight = -INFINITY;
1796  }
1797 
1798  if (this_node->details->type == node_ping) {
1799  crm_info("%s is not a Pacemaker node", pe__node_name(this_node));
1800 
1801  } else if (this_node->details->unclean) {
1802  pe_proc_warn("%s is unclean", pe__node_name(this_node));
1803 
1804  } else if (this_node->details->online) {
1805  crm_info("%s is %s", pe__node_name(this_node),
1806  this_node->details->shutdown ? "shutting down" :
1807  this_node->details->pending ? "pending" :
1808  this_node->details->standby ? "standby" :
1809  this_node->details->maintenance ? "maintenance" : "online");
1810 
1811  } else {
1812  crm_trace("%s is offline", pe__node_name(this_node));
1813  }
1814 }
1815 
1824 const char *
1825 pe_base_name_end(const char *id)
1826 {
1827  if (!pcmk__str_empty(id)) {
1828  const char *end = id + strlen(id) - 1;
1829 
1830  for (const char *s = end; s > id; --s) {
1831  switch (*s) {
1832  case '0':
1833  case '1':
1834  case '2':
1835  case '3':
1836  case '4':
1837  case '5':
1838  case '6':
1839  case '7':
1840  case '8':
1841  case '9':
1842  break;
1843  case ':':
1844  return (s == end)? s : (s - 1);
1845  default:
1846  return end;
1847  }
1848  }
1849  return end;
1850  }
1851  return NULL;
1852 }
1853 
1864 char *
1865 clone_strip(const char *last_rsc_id)
1866 {
1867  const char *end = pe_base_name_end(last_rsc_id);
1868  char *basename = NULL;
1869 
1870  CRM_ASSERT(end);
1871  basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1872  CRM_ASSERT(basename);
1873  return basename;
1874 }
1875 
1886 char *
1887 clone_zero(const char *last_rsc_id)
1888 {
1889  const char *end = pe_base_name_end(last_rsc_id);
1890  size_t base_name_len = end - last_rsc_id + 1;
1891  char *zero = NULL;
1892 
1893  CRM_ASSERT(end);
1894  zero = calloc(base_name_len + 3, sizeof(char));
1895  CRM_ASSERT(zero);
1896  memcpy(zero, last_rsc_id, base_name_len);
1897  zero[base_name_len] = ':';
1898  zero[base_name_len + 1] = '0';
1899  return zero;
1900 }
1901 
1902 static pcmk_resource_t *
1903 create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
1905 {
1906  pcmk_resource_t *rsc = NULL;
1907  xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
1908 
1909  copy_in_properties(xml_rsc, rsc_entry);
1910  crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
1911  crm_log_xml_debug(xml_rsc, "Orphan resource");
1912 
1913  if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
1914  return NULL;
1915  }
1916 
1917  if (xml_contains_remote_node(xml_rsc)) {
1918  pcmk_node_t *node;
1919 
1920  crm_debug("Detected orphaned remote node %s", rsc_id);
1921  node = pe_find_node(scheduler->nodes, rsc_id);
1922  if (node == NULL) {
1923  node = pe_create_node(rsc_id, rsc_id, "remote", NULL, scheduler);
1924  }
1925  link_rsc2remotenode(scheduler, rsc);
1926 
1927  if (node) {
1928  crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
1929  node->details->shutdown = TRUE;
1930  }
1931  }
1932 
1933  if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
1934  /* This orphaned rsc needs to be mapped to a container. */
1935  crm_trace("Detected orphaned container filler %s", rsc_id);
1937  }
1939  scheduler->resources = g_list_append(scheduler->resources, rsc);
1940  return rsc;
1941 }
1942 
1954 static pcmk_resource_t *
1955 create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
1956  const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
1957 {
1959 
1960  // find_rsc() because we might be a cloned group
1961  pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL,
1963 
1964  pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
1965  top->id, parent->id, rsc_id, pe__node_name(node));
1966  return orphan;
1967 }
1968 
1983 static pcmk_resource_t *
1984 find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1985  pcmk_resource_t *parent, const char *rsc_id)
1986 {
1987  GList *rIter = NULL;
1988  pcmk_resource_t *rsc = NULL;
1989  pcmk_resource_t *inactive_instance = NULL;
1990  gboolean skip_inactive = FALSE;
1991 
1992  CRM_ASSERT(parent != NULL);
1993  CRM_ASSERT(pe_rsc_is_clone(parent));
1995 
1996  // Check for active (or partially active, for cloned groups) instance
1997  pe_rsc_trace(parent, "Looking for %s on %s in %s",
1998  rsc_id, pe__node_name(node), parent->id);
1999  for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
2000  GList *locations = NULL;
2001  pcmk_resource_t *child = rIter->data;
2002 
2003  /* Check whether this instance is already known to be active or pending
2004  * anywhere, at this stage of unpacking. Because this function is called
2005  * for a resource before the resource's individual operation history
2006  * entries are unpacked, locations will generally not contain the
2007  * desired node.
2008  *
2009  * However, there are three exceptions:
2010  * (1) when child is a cloned group and we have already unpacked the
2011  * history of another member of the group on the same node;
2012  * (2) when we've already unpacked the history of another numbered
2013  * instance on the same node (which can happen if globally-unique
2014  * was flipped from true to false); and
2015  * (3) when we re-run calculations on the same scheduler data as part of
2016  * a simulation.
2017  */
2018  child->fns->location(child, &locations, 2);
2019  if (locations) {
2020  /* We should never associate the same numbered anonymous clone
2021  * instance with multiple nodes, and clone instances can't migrate,
2022  * so there must be only one location, regardless of history.
2023  */
2024  CRM_LOG_ASSERT(locations->next == NULL);
2025 
2026  if (((pcmk_node_t *) locations->data)->details == node->details) {
2027  /* This child instance is active on the requested node, so check
2028  * for a corresponding configured resource. We use find_rsc()
2029  * instead of child because child may be a cloned group, and we
2030  * need the particular member corresponding to rsc_id.
2031  *
2032  * If the history entry is orphaned, rsc will be NULL.
2033  */
2034  rsc = parent->fns->find_rsc(child, rsc_id, NULL,
2036  if (rsc) {
2037  /* If there are multiple instance history entries for an
2038  * anonymous clone in a single node's history (which can
2039  * happen if globally-unique is switched from true to
2040  * false), we want to consider the instances beyond the
2041  * first as orphans, even if there are inactive instance
2042  * numbers available.
2043  */
2044  if (rsc->running_on) {
2045  crm_notice("Active (now-)anonymous clone %s has "
2046  "multiple (orphan) instance histories on %s",
2047  parent->id, pe__node_name(node));
2048  skip_inactive = TRUE;
2049  rsc = NULL;
2050  } else {
2051  pe_rsc_trace(parent, "Resource %s, active", rsc->id);
2052  }
2053  }
2054  }
2055  g_list_free(locations);
2056 
2057  } else {
2058  pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
2059  if (!skip_inactive && !inactive_instance
2060  && !pcmk_is_set(child->flags, pcmk_rsc_blocked)) {
2061  // Remember one inactive instance in case we don't find active
2062  inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
2064 
2065  /* ... but don't use it if it was already associated with a
2066  * pending action on another node
2067  */
2068  if (inactive_instance && inactive_instance->pending_node
2069  && (inactive_instance->pending_node->details != node->details)) {
2070  inactive_instance = NULL;
2071  }
2072  }
2073  }
2074  }
2075 
2076  if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2077  pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
2078  rsc = inactive_instance;
2079  }
2080 
2081  /* If the resource has "requires" set to "quorum" or "nothing", and we don't
2082  * have a clone instance for every node, we don't want to consume a valid
2083  * instance number for unclean nodes. Such instances may appear to be active
2084  * according to the history, but should be considered inactive, so we can
2085  * start an instance elsewhere. Treat such instances as orphans.
2086  *
2087  * An exception is instances running on guest nodes -- since guest node
2088  * "fencing" is actually just a resource stop, requires shouldn't apply.
2089  *
2090  * @TODO Ideally, we'd use an inactive instance number if it is not needed
2091  * for any clean instances. However, we don't know that at this point.
2092  */
2093  if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
2094  && (!node->details->online || node->details->unclean)
2095  && !pe__is_guest_node(node)
2097 
2098  rsc = NULL;
2099  }
2100 
2101  if (rsc == NULL) {
2102  rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
2103  pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
2104  }
2105  return rsc;
2106 }
2107 
2108 static pcmk_resource_t *
2109 unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2110  const char *rsc_id)
2111 {
2112  pcmk_resource_t *rsc = NULL;
2113  pcmk_resource_t *parent = NULL;
2114 
2115  crm_trace("looking for %s", rsc_id);
2116  rsc = pe_find_resource(scheduler->resources, rsc_id);
2117 
2118  if (rsc == NULL) {
2119  /* If we didn't find the resource by its name in the operation history,
2120  * check it again as a clone instance. Even when clone-max=0, we create
2121  * a single :0 orphan to match against here.
2122  */
2123  char *clone0_id = clone_zero(rsc_id);
2125  clone0_id);
2126 
2127  if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) {
2128  rsc = clone0;
2129  parent = uber_parent(clone0);
2130  crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2131  } else {
2132  crm_trace("%s is not known as %s either (orphan)",
2133  rsc_id, clone0_id);
2134  }
2135  free(clone0_id);
2136 
2137  } else if (rsc->variant > pcmk_rsc_variant_primitive) {
2138  crm_trace("Resource history for %s is orphaned because it is no longer primitive",
2139  rsc_id);
2140  return NULL;
2141 
2142  } else {
2143  parent = uber_parent(rsc);
2144  }
2145 
2146  if (pe_rsc_is_anon_clone(parent)) {
2147 
2148  if (pe_rsc_is_bundled(parent)) {
2149  rsc = pe__find_bundle_replica(parent->parent, node);
2150  } else {
2151  char *base = clone_strip(rsc_id);
2152 
2153  rsc = find_anonymous_clone(scheduler, node, parent, base);
2154  free(base);
2155  CRM_ASSERT(rsc != NULL);
2156  }
2157  }
2158 
2159  if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_casei)
2160  && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_casei)) {
2161 
2162  pcmk__str_update(&rsc->clone_name, rsc_id);
2163  pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2164  rsc_id, pe__node_name(node), rsc->id,
2165  (pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : ""));
2166  }
2167  return rsc;
2168 }
2169 
2170 static pcmk_resource_t *
2171 process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
2173 {
2174  pcmk_resource_t *rsc = NULL;
2175  const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2176 
2177  crm_debug("Detected orphan resource %s on %s", rsc_id, pe__node_name(node));
2178  rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2179  if (rsc == NULL) {
2180  return NULL;
2181  }
2182 
2185 
2186  } else {
2187  CRM_CHECK(rsc != NULL, return NULL);
2188  pe_rsc_trace(rsc, "Added orphan %s", rsc->id);
2189  resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__",
2190  scheduler);
2191  }
2192  return rsc;
2193 }
2194 
2195 static void
2196 process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
2197  enum action_fail_response on_fail)
2198 {
2199  pcmk_node_t *tmpnode = NULL;
2200  char *reason = NULL;
2201  enum action_fail_response save_on_fail = pcmk_on_fail_ignore;
2202 
2203  CRM_ASSERT(rsc);
2204  pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2205  rsc->id, role2text(rsc->role), pe__node_name(node),
2206  fail2text(on_fail));
2207 
2208  /* process current state */
2209  if (rsc->role != pcmk_role_unknown) {
2210  pcmk_resource_t *iter = rsc;
2211 
2212  while (iter) {
2213  if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
2214  pcmk_node_t *n = pe__copy_node(node);
2215 
2216  pe_rsc_trace(rsc, "%s%s%s known on %s",
2217  rsc->id,
2218  ((rsc->clone_name == NULL)? "" : " also known as "),
2219  ((rsc->clone_name == NULL)? "" : rsc->clone_name),
2220  pe__node_name(n));
2221  g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
2222  }
2223  if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) {
2224  break;
2225  }
2226  iter = iter->parent;
2227  }
2228  }
2229 
2230  /* If a managed resource is believed to be running, but node is down ... */
2231  if ((rsc->role > pcmk_role_stopped)
2232  && node->details->online == FALSE
2233  && node->details->maintenance == FALSE
2234  && pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2235 
2236  gboolean should_fence = FALSE;
2237 
2238  /* If this is a guest node, fence it (regardless of whether fencing is
2239  * enabled, because guest node fencing is done by recovery of the
2240  * container resource rather than by the fencer). Mark the resource
2241  * we're processing as failed. When the guest comes back up, its
2242  * operation history in the CIB will be cleared, freeing the affected
2243  * resource to run again once we are sure we know its state.
2244  */
2245  if (pe__is_guest_node(node)) {
2248  should_fence = TRUE;
2249 
2250  } else if (pcmk_is_set(rsc->cluster->flags,
2252  if (pe__is_remote_node(node) && node->details->remote_rsc
2253  && !pcmk_is_set(node->details->remote_rsc->flags,
2254  pcmk_rsc_failed)) {
2255 
2256  /* Setting unseen means that fencing of the remote node will
2257  * occur only if the connection resource is not going to start
2258  * somewhere. This allows connection resources on a failed
2259  * cluster node to move to another node without requiring the
2260  * remote nodes to be fenced as well.
2261  */
2262  node->details->unseen = TRUE;
2263  reason = crm_strdup_printf("%s is active there (fencing will be"
2264  " revoked if remote connection can "
2265  "be re-established elsewhere)",
2266  rsc->id);
2267  }
2268  should_fence = TRUE;
2269  }
2270 
2271  if (should_fence) {
2272  if (reason == NULL) {
2273  reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
2274  }
2275  pe_fence_node(rsc->cluster, node, reason, FALSE);
2276  }
2277  free(reason);
2278  }
2279 
2280  /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
2281  save_on_fail = on_fail;
2282 
2283  if (node->details->unclean) {
2284  /* No extra processing needed
2285  * Also allows resources to be started again after a node is shot
2286  */
2287  on_fail = pcmk_on_fail_ignore;
2288  }
2289 
2290  switch (on_fail) {
2291  case pcmk_on_fail_ignore:
2292  /* nothing to do */
2293  break;
2294 
2295  case pcmk_on_fail_demote:
2297  demote_action(rsc, node, FALSE);
2298  break;
2299 
2301  /* treat it as if it is still running
2302  * but also mark the node as unclean
2303  */
2304  reason = crm_strdup_printf("%s failed there", rsc->id);
2305  pe_fence_node(rsc->cluster, node, reason, FALSE);
2306  free(reason);
2307  break;
2308 
2310  node->details->standby = TRUE;
2311  node->details->standby_onfail = TRUE;
2312  break;
2313 
2314  case pcmk_on_fail_block:
2315  /* is_managed == FALSE will prevent any
2316  * actions being sent for the resource
2317  */
2320  break;
2321 
2322  case pcmk_on_fail_ban:
2323  /* make sure it comes up somewhere else
2324  * or not at all
2325  */
2326  resource_location(rsc, node, -INFINITY, "__action_migration_auto__",
2327  rsc->cluster);
2328  break;
2329 
2330  case pcmk_on_fail_stop:
2331  pe__set_next_role(rsc, pcmk_role_stopped, "on-fail=stop");
2332  break;
2333 
2334  case pcmk_on_fail_restart:
2335  if ((rsc->role != pcmk_role_stopped)
2336  && (rsc->role != pcmk_role_unknown)) {
2339  stop_action(rsc, node, FALSE);
2340  }
2341  break;
2342 
2346  if (rsc->container && pe_rsc_is_bundled(rsc)) {
2347  /* A bundle's remote connection can run on a different node than
2348  * the bundle's container. We don't necessarily know where the
2349  * container is running yet, so remember it and add a stop
2350  * action for it later.
2351  */
2352  rsc->cluster->stop_needed =
2353  g_list_prepend(rsc->cluster->stop_needed, rsc->container);
2354  } else if (rsc->container) {
2355  stop_action(rsc->container, node, FALSE);
2356  } else if ((rsc->role != pcmk_role_stopped)
2357  && (rsc->role != pcmk_role_unknown)) {
2358  stop_action(rsc, node, FALSE);
2359  }
2360  break;
2361 
2366  tmpnode = NULL;
2367  if (rsc->is_remote_node) {
2368  tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id);
2369  }
2370  if (tmpnode &&
2371  pe__is_remote_node(tmpnode) &&
2372  tmpnode->details->remote_was_fenced == 0) {
2373 
2374  /* The remote connection resource failed in a way that
2375  * should result in fencing the remote node.
2376  */
2377  pe_fence_node(rsc->cluster, tmpnode,
2378  "remote connection is unrecoverable", FALSE);
2379  }
2380  }
2381 
2382  /* require the stop action regardless if fencing is occurring or not. */
2383  if (rsc->role > pcmk_role_stopped) {
2384  stop_action(rsc, node, FALSE);
2385  }
2386 
2387  /* if reconnect delay is in use, prevent the connection from exiting the
2388  * "STOPPED" role until the failure is cleared by the delay timeout. */
2389  if (rsc->remote_reconnect_ms) {
2390  pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2391  }
2392  break;
2393  }
2394 
2395  /* ensure a remote-node connection failure forces an unclean remote-node
2396  * to be fenced. By setting unseen = FALSE, the remote-node failure will
2397  * result in a fencing operation regardless if we're going to attempt to
2398  * reconnect to the remote-node in this transition or not. */
2399  if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) {
2400  tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id);
2401  if (tmpnode && tmpnode->details->unclean) {
2402  tmpnode->details->unseen = FALSE;
2403  }
2404  }
2405 
2406  if ((rsc->role != pcmk_role_stopped)
2407  && (rsc->role != pcmk_role_unknown)) {
2408  if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
2409  if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2410  pcmk__config_warn("Detected active orphan %s running on %s",
2411  rsc->id, pe__node_name(node));
2412  } else {
2413  pcmk__config_warn("Resource '%s' must be stopped manually on "
2414  "%s because cluster is configured not to "
2415  "stop active orphans",
2416  rsc->id, pe__node_name(node));
2417  }
2418  }
2419 
2420  native_add_running(rsc, node, rsc->cluster,
2421  (save_on_fail != pcmk_on_fail_ignore));
2422  switch (on_fail) {
2423  case pcmk_on_fail_ignore:
2424  break;
2425  case pcmk_on_fail_demote:
2426  case pcmk_on_fail_block:
2428  break;
2429  default:
2432  break;
2433  }
2434 
2435  } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2436  /* Only do this for older status sections that included instance numbers
2437  * Otherwise stopped instances will appear as orphans
2438  */
2439  pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
2440  free(rsc->clone_name);
2441  rsc->clone_name = NULL;
2442 
2443  } else {
2444  GList *possible_matches = pe__resource_actions(rsc, node,
2445  PCMK_ACTION_STOP, FALSE);
2446  GList *gIter = possible_matches;
2447 
2448  for (; gIter != NULL; gIter = gIter->next) {
2449  pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2450 
2452  }
2453 
2454  g_list_free(possible_matches);
2455  }
2456 
2457  /* A successful stop after migrate_to on the migration source doesn't make
2458  * the partially migrated resource stopped on the migration target.
2459  */
2460  if ((rsc->role == pcmk_role_stopped)
2461  && rsc->partial_migration_source
2462  && rsc->partial_migration_source->details == node->details
2463  && rsc->partial_migration_target
2464  && rsc->running_on) {
2465 
2466  rsc->role = pcmk_role_started;
2467  }
2468 }
2469 
2470 /* create active recurring operations as optional */
2471 static void
2472 process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
2473  int start_index, int stop_index,
2474  GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2475 {
2476  int counter = -1;
2477  const char *task = NULL;
2478  const char *status = NULL;
2479  GList *gIter = sorted_op_list;
2480 
2481  CRM_ASSERT(rsc);
2482  pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
2483 
2484  for (; gIter != NULL; gIter = gIter->next) {
2485  xmlNode *rsc_op = (xmlNode *) gIter->data;
2486 
2487  guint interval_ms = 0;
2488  char *key = NULL;
2489  const char *id = ID(rsc_op);
2490 
2491  counter++;
2492 
2493  if (node->details->online == FALSE) {
2494  pe_rsc_trace(rsc, "Skipping %s on %s: node is offline",
2495  rsc->id, pe__node_name(node));
2496  break;
2497 
2498  /* Need to check if there's a monitor for role="Stopped" */
2499  } else if (start_index < stop_index && counter <= stop_index) {
2500  pe_rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2501  id, pe__node_name(node));
2502  continue;
2503 
2504  } else if (counter < start_index) {
2505  pe_rsc_trace(rsc, "Skipping %s on %s: old %d",
2506  id, pe__node_name(node), counter);
2507  continue;
2508  }
2509 
2510  crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
2511  if (interval_ms == 0) {
2512  pe_rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2513  id, pe__node_name(node));
2514  continue;
2515  }
2516 
2517  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2518  if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2519  pe_rsc_trace(rsc, "Skipping %s on %s: status",
2520  id, pe__node_name(node));
2521  continue;
2522  }
2523  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2524  /* create the action */
2525  key = pcmk__op_key(rsc->id, task, interval_ms);
2526  pe_rsc_trace(rsc, "Creating %s on %s", key, pe__node_name(node));
2527  custom_action(rsc, key, task, node, TRUE, scheduler);
2528  }
2529 }
2530 
2531 void
2532 calculate_active_ops(const GList *sorted_op_list, int *start_index,
2533  int *stop_index)
2534 {
2535  int counter = -1;
2536  int implied_monitor_start = -1;
2537  int implied_clone_start = -1;
2538  const char *task = NULL;
2539  const char *status = NULL;
2540 
2541  *stop_index = -1;
2542  *start_index = -1;
2543 
2544  for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2545  const xmlNode *rsc_op = (const xmlNode *) iter->data;
2546 
2547  counter++;
2548 
2549  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2550  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2551 
2552  if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2553  && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2554  *stop_index = counter;
2555 
2556  } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2557  PCMK_ACTION_MIGRATE_FROM, NULL)) {
2558  *start_index = counter;
2559 
2560  } else if ((implied_monitor_start <= *stop_index)
2561  && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2562  pcmk__str_casei)) {
2563  const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
2564 
2565  if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2566  implied_monitor_start = counter;
2567  }
2568  } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
2569  PCMK_ACTION_DEMOTE, NULL)) {
2570  implied_clone_start = counter;
2571  }
2572  }
2573 
2574  if (*start_index == -1) {
2575  if (implied_clone_start != -1) {
2576  *start_index = implied_clone_start;
2577  } else if (implied_monitor_start != -1) {
2578  *start_index = implied_monitor_start;
2579  }
2580  }
2581 }
2582 
2583 // If resource history entry has shutdown lock, remember lock node and time
2584 static void
2585 unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
2586  const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2587 {
2588  time_t lock_time = 0; // When lock started (i.e. node shutdown time)
2589 
2591  &lock_time) == pcmk_ok) && (lock_time != 0)) {
2592 
2593  if ((scheduler->shutdown_lock > 0)
2595  > (lock_time + scheduler->shutdown_lock))) {
2596  pe_rsc_info(rsc, "Shutdown lock for %s on %s expired",
2597  rsc->id, pe__node_name(node));
2598  pe__clear_resource_history(rsc, node);
2599  } else {
2600  /* @COMPAT I don't like breaking const signatures, but
2601  * rsc->lock_node should really be const -- we just can't change it
2602  * until the next API compatibility break.
2603  */
2604  rsc->lock_node = (pcmk_node_t *) node;
2605  rsc->lock_time = lock_time;
2606  }
2607  }
2608 }
2609 
2620 static pcmk_resource_t *
2621 unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
2623 {
2624  GList *gIter = NULL;
2625  int stop_index = -1;
2626  int start_index = -1;
2627  enum rsc_role_e req_role = pcmk_role_unknown;
2628 
2629  const char *rsc_id = ID(lrm_resource);
2630 
2631  pcmk_resource_t *rsc = NULL;
2632  GList *op_list = NULL;
2633  GList *sorted_op_list = NULL;
2634 
2635  xmlNode *rsc_op = NULL;
2636  xmlNode *last_failure = NULL;
2637 
2639  enum rsc_role_e saved_role = pcmk_role_unknown;
2640 
2641  if (rsc_id == NULL) {
2642  crm_warn("Ignoring malformed " XML_LRM_TAG_RESOURCE
2643  " entry without id");
2644  return NULL;
2645  }
2646  crm_trace("Unpacking " XML_LRM_TAG_RESOURCE " for %s on %s",
2647  rsc_id, pe__node_name(node));
2648 
2649  // Build a list of individual lrm_rsc_op entries, so we can sort them
2650  for (rsc_op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP);
2651  rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) {
2652 
2653  op_list = g_list_prepend(op_list, rsc_op);
2654  }
2655 
2657  if (op_list == NULL) {
2658  // If there are no operations, there is nothing to do
2659  return NULL;
2660  }
2661  }
2662 
2663  /* find the resource */
2664  rsc = unpack_find_resource(scheduler, node, rsc_id);
2665  if (rsc == NULL) {
2666  if (op_list == NULL) {
2667  // If there are no operations, there is nothing to do
2668  return NULL;
2669  } else {
2670  rsc = process_orphan_resource(lrm_resource, node, scheduler);
2671  }
2672  }
2673  CRM_ASSERT(rsc != NULL);
2674 
2675  // Check whether the resource is "shutdown-locked" to this node
2677  unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2678  }
2679 
2680  /* process operations */
2681  saved_role = rsc->role;
2682  rsc->role = pcmk_role_unknown;
2683  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2684 
2685  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2686  xmlNode *rsc_op = (xmlNode *) gIter->data;
2687 
2688  unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2689  }
2690 
2691  /* create active recurring operations as optional */
2692  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2693  process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2694  scheduler);
2695 
2696  /* no need to free the contents */
2697  g_list_free(sorted_op_list);
2698 
2699  process_rsc_state(rsc, node, on_fail);
2700 
2701  if (get_target_role(rsc, &req_role)) {
2702  if ((rsc->next_role == pcmk_role_unknown)
2703  || (req_role < rsc->next_role)) {
2704 
2706 
2707  } else if (req_role > rsc->next_role) {
2708  pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
2709  " with requested next role %s",
2710  rsc->id, role2text(rsc->next_role), role2text(req_role));
2711  }
2712  }
2713 
2714  if (saved_role > rsc->role) {
2715  rsc->role = saved_role;
2716  }
2717 
2718  return rsc;
2719 }
2720 
2721 static void
2722 handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list,
2724 {
2725  for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list);
2726  rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
2727 
2728  pcmk_resource_t *rsc;
2729  pcmk_resource_t *container;
2730  const char *rsc_id;
2731  const char *container_id;
2732 
2733  if (!pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_casei)) {
2734  continue;
2735  }
2736 
2737  container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
2738  rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2739  if (container_id == NULL || rsc_id == NULL) {
2740  continue;
2741  }
2742 
2743  container = pe_find_resource(scheduler->resources, container_id);
2744  if (container == NULL) {
2745  continue;
2746  }
2747 
2748  rsc = pe_find_resource(scheduler->resources, rsc_id);
2749  if ((rsc == NULL) || (rsc->container != NULL)
2751  continue;
2752  }
2753 
2754  pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
2755  rsc->id, container_id);
2756  rsc->container = container;
2757  container->fillers = g_list_append(container->fillers, rsc);
2758  }
2759 }
2760 
2769 static void
2770 unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
2772 {
2773  bool found_orphaned_container_filler = false;
2774 
2775  // Drill down to lrm_resources section
2776  xml = find_xml_node(xml, XML_CIB_TAG_LRM, FALSE);
2777  if (xml == NULL) {
2778  return;
2779  }
2780  xml = find_xml_node(xml, XML_LRM_TAG_RESOURCES, FALSE);
2781  if (xml == NULL) {
2782  return;
2783  }
2784 
2785  // Unpack each lrm_resource entry
2786  for (const xmlNode *rsc_entry = first_named_child(xml, XML_LRM_TAG_RESOURCE);
2787  rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) {
2788 
2789  pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2790 
2791  if ((rsc != NULL)
2793  found_orphaned_container_filler = true;
2794  }
2795  }
2796 
2797  /* Now that all resource state has been unpacked for this node, map any
2798  * orphaned container fillers to their container resource.
2799  */
2800  if (found_orphaned_container_filler) {
2801  handle_orphaned_container_fillers(xml, scheduler);
2802  }
2803 }
2804 
2805 static void
2806 set_active(pcmk_resource_t *rsc)
2807 {
2808  const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2809 
2810  if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) {
2811  rsc->role = pcmk_role_unpromoted;
2812  } else {
2813  rsc->role = pcmk_role_started;
2814  }
2815 }
2816 
2817 static void
2818 set_node_score(gpointer key, gpointer value, gpointer user_data)
2819 {
2820  pcmk_node_t *node = value;
2821  int *score = user_data;
2822 
2823  node->weight = *score;
2824 }
2825 
2826 #define XPATH_NODE_STATE "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
2827  "/" XML_CIB_TAG_STATE
2828 #define SUB_XPATH_LRM_RESOURCE "/" XML_CIB_TAG_LRM \
2829  "/" XML_LRM_TAG_RESOURCES \
2830  "/" XML_LRM_TAG_RESOURCE
2831 #define SUB_XPATH_LRM_RSC_OP "/" XML_LRM_TAG_RSC_OP
2832 
2833 static xmlNode *
2834 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2835  int target_rc, pcmk_scheduler_t *scheduler)
2836 {
2837  GString *xpath = NULL;
2838  xmlNode *xml = NULL;
2839 
2840  CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2841  return NULL);
2842 
2843  xpath = g_string_sized_new(256);
2844  pcmk__g_strcat(xpath,
2845  XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node, "']"
2846  SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", resource, "']"
2847  SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'",
2848  NULL);
2849 
2850  /* Need to check against transition_magic too? */
2851  if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
2852  pcmk__g_strcat(xpath,
2853  " and @" XML_LRM_ATTR_MIGRATE_TARGET "='", source, "']",
2854  NULL);
2855 
2856  } else if ((source != NULL)
2857  && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
2858  pcmk__g_strcat(xpath,
2859  " and @" XML_LRM_ATTR_MIGRATE_SOURCE "='", source, "']",
2860  NULL);
2861  } else {
2862  g_string_append_c(xpath, ']');
2863  }
2864 
2865  xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2866  LOG_DEBUG);
2867  g_string_free(xpath, TRUE);
2868 
2869  if (xml && target_rc >= 0) {
2870  int rc = PCMK_OCF_UNKNOWN_ERROR;
2871  int status = PCMK_EXEC_ERROR;
2872 
2875  if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
2876  return NULL;
2877  }
2878  }
2879  return xml;
2880 }
2881 
2882 static xmlNode *
2883 find_lrm_resource(const char *rsc_id, const char *node_name,
2885 {
2886  GString *xpath = NULL;
2887  xmlNode *xml = NULL;
2888 
2889  CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
2890 
2891  xpath = g_string_sized_new(256);
2892  pcmk__g_strcat(xpath,
2893  XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']"
2894  SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc_id, "']",
2895  NULL);
2896 
2897  xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2898  LOG_DEBUG);
2899 
2900  g_string_free(xpath, TRUE);
2901  return xml;
2902 }
2903 
2913 static bool
2914 unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
2915 {
2916  bool result = false;
2917  xmlXPathObjectPtr search;
2918  GString *xpath = g_string_sized_new(256);
2919 
2920  pcmk__g_strcat(xpath,
2921  XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']"
2922  SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc->id, "']"
2923  SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_RC "!='193']",
2924  NULL);
2925  search = xpath_search(rsc->cluster->input, (const char *) xpath->str);
2926  result = (numXpathResults(search) == 0);
2927  freeXpathObject(search);
2928  g_string_free(xpath, TRUE);
2929  return result;
2930 }
2931 
2944 static bool
2945 monitor_not_running_after(const char *rsc_id, const char *node_name,
2946  const xmlNode *xml_op, bool same_node,
2948 {
2949  /* Any probe/monitor operation on the node indicating it was not running
2950  * there
2951  */
2952  xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
2954 
2955  return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0);
2956 }
2957 
2970 static bool
2971 non_monitor_after(const char *rsc_id, const char *node_name,
2972  const xmlNode *xml_op, bool same_node,
2974 {
2975  xmlNode *lrm_resource = NULL;
2976 
2977  lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
2978  if (lrm_resource == NULL) {
2979  return false;
2980  }
2981 
2982  for (xmlNode *op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP);
2983  op != NULL; op = crm_next_same_xml(op)) {
2984  const char * task = NULL;
2985 
2986  if (op == xml_op) {
2987  continue;
2988  }
2989 
2991 
2994  NULL)
2995  && pe__is_newer_op(op, xml_op, same_node) > 0) {
2996  return true;
2997  }
2998  }
2999 
3000  return false;
3001 }
3002 
3015 static bool
3016 newer_state_after_migrate(const char *rsc_id, const char *node_name,
3017  const xmlNode *migrate_to,
3018  const xmlNode *migrate_from,
3020 {
3021  const xmlNode *xml_op = migrate_to;
3022  const char *source = NULL;
3023  const char *target = NULL;
3024  bool same_node = false;
3025 
3026  if (migrate_from) {
3027  xml_op = migrate_from;
3028  }
3029 
3032 
3033  /* It's preferred to compare to the migrate event on the same node if
3034  * existing, since call ids are more reliable.
3035  */
3036  if (pcmk__str_eq(node_name, target, pcmk__str_casei)) {
3037  if (migrate_from) {
3038  xml_op = migrate_from;
3039  same_node = true;
3040 
3041  } else {
3042  xml_op = migrate_to;
3043  }
3044 
3045  } else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3046  if (migrate_to) {
3047  xml_op = migrate_to;
3048  same_node = true;
3049 
3050  } else {
3051  xml_op = migrate_from;
3052  }
3053  }
3054 
3055  /* If there's any newer non-monitor operation on the node, or any newer
3056  * probe/monitor operation on the node indicating it was not running there,
3057  * the migration events potentially no longer matter for the node.
3058  */
3059  return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler)
3060  || monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
3061  scheduler);
3062 }
3063 
3076 static int
3077 get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
3078  const pcmk_node_t *target_node,
3079  const char **source_name, const char **target_name)
3080 {
3081  *source_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_SOURCE);
3082  *target_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_TARGET);
3083  if ((*source_name == NULL) || (*target_name == NULL)) {
3084  crm_err("Ignoring resource history entry %s without "
3086  ID(entry));
3087  return pcmk_rc_unpack_error;
3088  }
3089 
3090  if ((source_node != NULL)
3091  && !pcmk__str_eq(*source_name, source_node->details->uname,
3093  crm_err("Ignoring resource history entry %s because "
3094  XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s",
3095  ID(entry), *source_name, pe__node_name(source_node));
3096  return pcmk_rc_unpack_error;
3097  }
3098 
3099  if ((target_node != NULL)
3100  && !pcmk__str_eq(*target_name, target_node->details->uname,
3102  crm_err("Ignoring resource history entry %s because "
3103  XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s",
3104  ID(entry), *target_name, pe__node_name(target_node));
3105  return pcmk_rc_unpack_error;
3106  }
3107 
3108  return pcmk_rc_ok;
3109 }
3110 
3111 /*
3112  * \internal
3113  * \brief Add a migration source to a resource's list of dangling migrations
3114  *
3115  * If the migrate_to and migrate_from actions in a live migration both
3116  * succeeded, but there is no stop on the source, the migration is considered
3117  * "dangling." Add the source to the resource's dangling migration list, which
3118  * will be used to schedule a stop on the source without affecting the target.
3119  *
3120  * \param[in,out] rsc Resource involved in migration
3121  * \param[in] node Migration source
3122  */
3123 static void
3124 add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
3125 {
3126  pe_rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3127  rsc->id, pe__node_name(node));
3128  rsc->role = pcmk_role_stopped;
3129  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations,
3130  (gpointer) node);
3131 }
3132 
3139 static void
3140 unpack_migrate_to_success(struct action_history *history)
3141 {
3142  /* A complete migration sequence is:
3143  * 1. migrate_to on source node (which succeeded if we get to this function)
3144  * 2. migrate_from on target node
3145  * 3. stop on source node
3146  *
3147  * If no migrate_from has happened, the migration is considered to be
3148  * "partial". If the migrate_from succeeded but no stop has happened, the
3149  * migration is considered to be "dangling".
3150  *
3151  * If a successful migrate_to and stop have happened on the source node, we
3152  * still need to check for a partial migration, due to scenarios (easier to
3153  * produce with batch-limit=1) like:
3154  *
3155  * - A resource is migrating from node1 to node2, and a migrate_to is
3156  * initiated for it on node1.
3157  *
3158  * - node2 goes into standby mode while the migrate_to is pending, which
3159  * aborts the transition.
3160  *
3161  * - Upon completion of the migrate_to, a new transition schedules a stop
3162  * on both nodes and a start on node1.
3163  *
3164  * - If the new transition is aborted for any reason while the resource is
3165  * stopping on node1, the transition after that stop completes will see
3166  * the migrate_to and stop on the source, but it's still a partial
3167  * migration, and the resource must be stopped on node2 because it is
3168  * potentially active there due to the migrate_to.
3169  *
3170  * We also need to take into account that either node's history may be
3171  * cleared at any point in the migration process.
3172  */
3173  int from_rc = PCMK_OCF_OK;
3174  int from_status = PCMK_EXEC_PENDING;
3175  pcmk_node_t *target_node = NULL;
3176  xmlNode *migrate_from = NULL;
3177  const char *source = NULL;
3178  const char *target = NULL;
3179  bool source_newer_op = false;
3180  bool target_newer_state = false;
3181  bool active_on_target = false;
3182 
3183  // Get source and target node names from XML
3184  if (get_migration_node_names(history->xml, history->node, NULL, &source,
3185  &target) != pcmk_rc_ok) {
3186  return;
3187  }
3188 
3189  // Check for newer state on the source
3190  source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3191  true, history->rsc->cluster);
3192 
3193  // Check for a migrate_from action from this source on the target
3194  migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3195  target, source, -1, history->rsc->cluster);
3196  if (migrate_from != NULL) {
3197  if (source_newer_op) {
3198  /* There's a newer non-monitor operation on the source and a
3199  * migrate_from on the target, so this migrate_to is irrelevant to
3200  * the resource's state.
3201  */
3202  return;
3203  }
3204  crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
3206  &from_status);
3207  }
3208 
3209  /* If the resource has newer state on both the source and target after the
3210  * migration events, this migrate_to is irrelevant to the resource's state.
3211  */
3212  target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3213  history->xml, migrate_from,
3214  history->rsc->cluster);
3215  if (source_newer_op && target_newer_state) {
3216  return;
3217  }
3218 
3219  /* Check for dangling migration (migrate_from succeeded but stop not done).
3220  * We know there's no stop because we already returned if the target has a
3221  * migrate_from and the source has any newer non-monitor operation.
3222  */
3223  if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3224  add_dangling_migration(history->rsc, history->node);
3225  return;
3226  }
3227 
3228  /* Without newer state, this migrate_to implies the resource is active.
3229  * (Clones are not allowed to migrate, so role can't be promoted.)
3230  */
3231  history->rsc->role = pcmk_role_started;
3232 
3233  target_node = pe_find_node(history->rsc->cluster->nodes, target);
3234  active_on_target = !target_newer_state && (target_node != NULL)
3235  && target_node->details->online;
3236 
3237  if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
3238  if (active_on_target) {
3239  native_add_running(history->rsc, target_node, history->rsc->cluster,
3240  TRUE);
3241  } else {
3242  // Mark resource as failed, require recovery, and prevent migration
3243  pe__set_resource_flags(history->rsc,
3246  }
3247  return;
3248  }
3249 
3250  // The migrate_from is pending, complete but erased, or to be scheduled
3251 
3252  /* If there is no history at all for the resource on an online target, then
3253  * it was likely cleaned. Just return, and we'll schedule a probe. Once we
3254  * have the probe result, it will be reflected in target_newer_state.
3255  */
3256  if ((target_node != NULL) && target_node->details->online
3257  && unknown_on_node(history->rsc, target)) {
3258  return;
3259  }
3260 
3261  if (active_on_target) {
3262  pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes,
3263  source);
3264 
3265  native_add_running(history->rsc, target_node, history->rsc->cluster,
3266  FALSE);
3267  if ((source_node != NULL) && source_node->details->online) {
3268  /* This is a partial migration: the migrate_to completed
3269  * successfully on the source, but the migrate_from has not
3270  * completed. Remember the source and target; if the newly
3271  * chosen target remains the same when we schedule actions
3272  * later, we may continue with the migration.
3273  */
3274  history->rsc->partial_migration_target = target_node;
3275  history->rsc->partial_migration_source = source_node;
3276  }
3277 
3278  } else if (!source_newer_op) {
3279  // Mark resource as failed, require recovery, and prevent migration
3280  pe__set_resource_flags(history->rsc,
3283  }
3284 }
3285 
3292 static void
3293 unpack_migrate_to_failure(struct action_history *history)
3294 {
3295  xmlNode *target_migrate_from = NULL;
3296  const char *source = NULL;
3297  const char *target = NULL;
3298 
3299  // Get source and target node names from XML
3300  if (get_migration_node_names(history->xml, history->node, NULL, &source,
3301  &target) != pcmk_rc_ok) {
3302  return;
3303  }
3304 
3305  /* If a migration failed, we have to assume the resource is active. Clones
3306  * are not allowed to migrate, so role can't be promoted.
3307  */
3308  history->rsc->role = pcmk_role_started;
3309 
3310  // Check for migrate_from on the target
3311  target_migrate_from = find_lrm_op(history->rsc->id,
3313  PCMK_OCF_OK, history->rsc->cluster);
3314 
3315  if (/* If the resource state is unknown on the target, it will likely be
3316  * probed there.
3317  * Don't just consider it running there. We will get back here anyway in
3318  * case the probe detects it's running there.
3319  */
3320  !unknown_on_node(history->rsc, target)
3321  /* If the resource has newer state on the target after the migration
3322  * events, this migrate_to no longer matters for the target.
3323  */
3324  && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3325  target_migrate_from,
3326  history->rsc->cluster)) {
3327  /* The resource has no newer state on the target, so assume it's still
3328  * active there.
3329  * (if it is up).
3330  */
3331  pcmk_node_t *target_node = pe_find_node(history->rsc->cluster->nodes,
3332  target);
3333 
3334  if (target_node && target_node->details->online) {
3335  native_add_running(history->rsc, target_node, history->rsc->cluster,
3336  FALSE);
3337  }
3338 
3339  } else if (!non_monitor_after(history->rsc->id, source, history->xml, true,
3340  history->rsc->cluster)) {
3341  /* We know the resource has newer state on the target, but this
3342  * migrate_to still matters for the source as long as there's no newer
3343  * non-monitor operation there.
3344  */
3345 
3346  // Mark node as having dangling migration so we can force a stop later
3347  history->rsc->dangling_migrations =
3348  g_list_prepend(history->rsc->dangling_migrations,
3349  (gpointer) history->node);
3350  }
3351 }
3352 
3359 static void
3360 unpack_migrate_from_failure(struct action_history *history)
3361 {
3362  xmlNode *source_migrate_to = NULL;
3363  const char *source = NULL;
3364  const char *target = NULL;
3365 
3366  // Get source and target node names from XML
3367  if (get_migration_node_names(history->xml, NULL, history->node, &source,
3368  &target) != pcmk_rc_ok) {
3369  return;
3370  }
3371 
3372  /* If a migration failed, we have to assume the resource is active. Clones
3373  * are not allowed to migrate, so role can't be promoted.
3374  */
3375  history->rsc->role = pcmk_role_started;
3376 
3377  // Check for a migrate_to on the source
3378  source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3379  source, target, PCMK_OCF_OK,
3380  history->rsc->cluster);
3381 
3382  if (/* If the resource state is unknown on the source, it will likely be
3383  * probed there.
3384  * Don't just consider it running there. We will get back here anyway in
3385  * case the probe detects it's running there.
3386  */
3387  !unknown_on_node(history->rsc, source)
3388  /* If the resource has newer state on the source after the migration
3389  * events, this migrate_from no longer matters for the source.
3390  */
3391  && !newer_state_after_migrate(history->rsc->id, source,
3392  source_migrate_to, history->xml,
3393  history->rsc->cluster)) {
3394  /* The resource has no newer state on the source, so assume it's still
3395  * active there (if it is up).
3396  */
3397  pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes,
3398  source);
3399 
3400  if (source_node && source_node->details->online) {
3401  native_add_running(history->rsc, source_node, history->rsc->cluster,
3402  TRUE);
3403  }
3404  }
3405 }
3406 
3413 static void
3414 record_failed_op(struct action_history *history)
3415 {
3416  if (!(history->node->details->online)) {
3417  return;
3418  }
3419 
3420  for (const xmlNode *xIter = history->rsc->cluster->failed->children;
3421  xIter != NULL; xIter = xIter->next) {
3422 
3423  const char *key = pe__xe_history_key(xIter);
3424  const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
3425 
3426  if (pcmk__str_eq(history->key, key, pcmk__str_none)
3427  && pcmk__str_eq(uname, history->node->details->uname,
3428  pcmk__str_casei)) {
3429  crm_trace("Skipping duplicate entry %s on %s",
3430  history->key, pe__node_name(history->node));
3431  return;
3432  }
3433  }
3434 
3435  crm_trace("Adding entry for %s on %s to failed action list",
3436  history->key, pe__node_name(history->node));
3437  crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname);
3438  crm_xml_add(history->xml, XML_LRM_ATTR_RSCID, history->rsc->id);
3439  add_node_copy(history->rsc->cluster->failed, history->xml);
3440 }
3441 
3442 static char *
3443 last_change_str(const xmlNode *xml_op)
3444 {
3445  time_t when;
3446  char *result = NULL;
3447 
3449  &when) == pcmk_ok) {
3450  char *when_s = pcmk__epoch2str(&when, 0);
3451  const char *p = strchr(when_s, ' ');
3452 
3453  // Skip day of week to make message shorter
3454  if ((p != NULL) && (*(++p) != '\0')) {
3455  result = strdup(p);
3456  CRM_ASSERT(result != NULL);
3457  }
3458  free(when_s);
3459  }
3460 
3461  if (result == NULL) {
3462  result = strdup("unknown time");
3463  CRM_ASSERT(result != NULL);
3464  }
3465 
3466  return result;
3467 }
3468 
3481 static int
3482 cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
3483 {
3484  switch (first) {
3485  case pcmk_on_fail_demote:
3486  switch (second) {
3487  case pcmk_on_fail_ignore:
3488  return 1;
3489  case pcmk_on_fail_demote:
3490  return 0;
3491  default:
3492  return -1;
3493  }
3494  break;
3495 
3497  switch (second) {
3498  case pcmk_on_fail_ignore:
3499  case pcmk_on_fail_demote:
3500  case pcmk_on_fail_restart:
3501  return 1;
3503  return 0;
3504  default:
3505  return -1;
3506  }
3507  break;
3508 
3510  switch (second) {
3511  case pcmk_on_fail_ignore:
3512  case pcmk_on_fail_demote:
3513  case pcmk_on_fail_restart:
3515  return 1;
3517  return 0;
3518  default:
3519  return -1;
3520  }
3521  break;
3522 
3523  default:
3524  break;
3525  }
3526  switch (second) {
3527  case pcmk_on_fail_demote:
3528  return (first == pcmk_on_fail_ignore)? -1 : 1;
3529 
3531  switch (first) {
3532  case pcmk_on_fail_ignore:
3533  case pcmk_on_fail_demote:
3534  case pcmk_on_fail_restart:
3535  return -1;
3536  default:
3537  return 1;
3538  }
3539  break;
3540 
3542  switch (first) {
3543  case pcmk_on_fail_ignore:
3544  case pcmk_on_fail_demote:
3545  case pcmk_on_fail_restart:
3547  return -1;
3548  default:
3549  return 1;
3550  }
3551  break;
3552 
3553  default:
3554  break;
3555  }
3556  return first - second;
3557 }
3558 
3565 static void
3566 ban_from_all_nodes(pcmk_resource_t *rsc)
3567 {
3568  int score = -INFINITY;
3569  pcmk_resource_t *fail_rsc = rsc;
3570 
3571  if (fail_rsc->parent != NULL) {
3572  pcmk_resource_t *parent = uber_parent(fail_rsc);
3573 
3574  if (pe_rsc_is_anon_clone(parent)) {
3575  /* For anonymous clones, if an operation with on-fail=stop fails for
3576  * any instance, the entire clone must stop.
3577  */
3578  fail_rsc = parent;
3579  }
3580  }
3581 
3582  // Ban the resource from all nodes
3583  crm_notice("%s will not be started under current conditions", fail_rsc->id);
3584  if (fail_rsc->allowed_nodes != NULL) {
3585  g_hash_table_destroy(fail_rsc->allowed_nodes);
3586  }
3587  fail_rsc->allowed_nodes = pe__node_list2table(rsc->cluster->nodes);
3588  g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
3589 }
3590 
3599 static void
3600 unpack_failure_handling(struct action_history *history,
3601  enum action_fail_response *on_fail,
3602  enum rsc_role_e *fail_role)
3603 {
3604  xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3605  history->interval_ms, true);
3606 
3607  GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3608  history->task,
3609  history->interval_ms, config);
3610 
3611  const char *on_fail_str = g_hash_table_lookup(meta, XML_OP_ATTR_ON_FAIL);
3612 
3613  *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3614  history->interval_ms, on_fail_str);
3615  *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3616  meta);
3617  g_hash_table_destroy(meta);
3618 }
3619 
3630 static void
3631 unpack_rsc_op_failure(struct action_history *history,
3632  enum action_fail_response config_on_fail,
3633  enum rsc_role_e fail_role, xmlNode **last_failure,
3634  enum action_fail_response *on_fail)
3635 {
3636  bool is_probe = false;
3637  char *last_change_s = NULL;
3638 
3639  *last_failure = history->xml;
3640 
3641  is_probe = pcmk_xe_is_probe(history->xml);
3642  last_change_s = last_change_str(history->xml);
3643 
3644  if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster)
3645  && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3646  crm_trace("Unexpected result (%s%s%s) was recorded for "
3647  "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3648  services_ocf_exitcode_str(history->exit_status),
3649  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3650  pcmk__s(history->exit_reason, ""),
3651  (is_probe? "probe" : history->task), history->rsc->id,
3652  pe__node_name(history->node), last_change_s,
3653  history->exit_status, history->id);
3654  } else {
3655  crm_warn("Unexpected result (%s%s%s) was recorded for "
3656  "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3657  services_ocf_exitcode_str(history->exit_status),
3658  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3659  pcmk__s(history->exit_reason, ""),
3660  (is_probe? "probe" : history->task), history->rsc->id,
3661  pe__node_name(history->node), last_change_s,
3662  history->exit_status, history->id);
3663 
3664  if (is_probe && (history->exit_status != PCMK_OCF_OK)
3665  && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3666  && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3667 
3668  /* A failed (not just unexpected) probe result could mean the user
3669  * didn't know resources will be probed even where they can't run.
3670  */
3671  crm_notice("If it is not possible for %s to run on %s, see "
3672  "the resource-discovery option for location constraints",
3673  history->rsc->id, pe__node_name(history->node));
3674  }
3675 
3676  record_failed_op(history);
3677  }
3678 
3679  free(last_change_s);
3680 
3681  if (cmp_on_fail(*on_fail, config_on_fail) < 0) {
3682  pe_rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3683  fail2text(*on_fail), fail2text(config_on_fail),
3684  history->key);
3685  *on_fail = config_on_fail;
3686  }
3687 
3688  if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3689  resource_location(history->rsc, history->node, -INFINITY,
3690  "__stop_fail__", history->rsc->cluster);
3691 
3692  } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3693  unpack_migrate_to_failure(history);
3694 
3695  } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3696  unpack_migrate_from_failure(history);
3697 
3698  } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3699  history->rsc->role = pcmk_role_promoted;
3700 
3701  } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3702  if (config_on_fail == pcmk_on_fail_block) {
3703  history->rsc->role = pcmk_role_promoted;
3704  pe__set_next_role(history->rsc, pcmk_role_stopped,
3705  "demote with on-fail=block");
3706 
3707  } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3708  history->rsc->role = pcmk_role_stopped;
3709 
3710  } else {
3711  /* Staying in the promoted role would put the scheduler and
3712  * controller into a loop. Setting the role to unpromoted is not
3713  * dangerous because the resource will be stopped as part of
3714  * recovery, and any promotion will be ordered after that stop.
3715  */
3716  history->rsc->role = pcmk_role_unpromoted;
3717  }
3718  }
3719 
3720  if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3721  /* leave stopped */
3722  pe_rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3723  history->rsc->role = pcmk_role_stopped;
3724 
3725  } else if (history->rsc->role < pcmk_role_started) {
3726  pe_rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3727  set_active(history->rsc);
3728  }
3729 
3730  pe_rsc_trace(history->rsc,
3731  "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
3732  history->rsc->id, role2text(history->rsc->role),
3733  pcmk__btoa(history->node->details->unclean),
3734  fail2text(config_on_fail), role2text(fail_role));
3735 
3736  if ((fail_role != pcmk_role_started)
3737  && (history->rsc->next_role < fail_role)) {
3738  pe__set_next_role(history->rsc, fail_role, "failure");
3739  }
3740 
3741  if (fail_role == pcmk_role_stopped) {
3742  ban_from_all_nodes(history->rsc);
3743  }
3744 }
3745 
3755 static void
3756 block_if_unrecoverable(struct action_history *history)
3757 {
3758  char *last_change_s = NULL;
3759 
3760  if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3761  return; // All actions besides stop are always recoverable
3762  }
3763  if (pe_can_fence(history->node->details->data_set, history->node)) {
3764  return; // Failed stops are recoverable via fencing
3765  }
3766 
3767  last_change_s = last_change_str(history->xml);
3768  pe_proc_err("No further recovery can be attempted for %s "
3769  "because %s on %s failed (%s%s%s) at %s "
3770  CRM_XS " rc=%d id=%s",
3771  history->rsc->id, history->task, pe__node_name(history->node),
3772  services_ocf_exitcode_str(history->exit_status),
3773  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3774  pcmk__s(history->exit_reason, ""),
3775  last_change_s, history->exit_status, history->id);
3776 
3777  free(last_change_s);
3778 
3781 }
3782 
3792 static inline void
3793 remap_because(struct action_history *history, const char **why, int value,
3794  const char *reason)
3795 {
3796  if (history->execution_status != value) {
3797  history->execution_status = value;
3798  *why = reason;
3799  }
3800 }
3801 
3824 static void
3825 remap_operation(struct action_history *history,
3826  enum action_fail_response *on_fail, bool expired)
3827 {
3828  bool is_probe = false;
3829  int orig_exit_status = history->exit_status;
3830  int orig_exec_status = history->execution_status;
3831  const char *why = NULL;
3832  const char *task = history->task;
3833 
3834  // Remap degraded results to their successful counterparts
3835  history->exit_status = pcmk__effective_rc(history->exit_status);
3836  if (history->exit_status != orig_exit_status) {
3837  why = "degraded result";
3838  if (!expired && (!history->node->details->shutdown
3839  || history->node->details->online)) {
3840  record_failed_op(history);
3841  }
3842  }
3843 
3844  if (!pe_rsc_is_bundled(history->rsc)
3845  && pcmk_xe_mask_probe_failure(history->xml)
3846  && ((history->execution_status != PCMK_EXEC_DONE)
3847  || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3848  history->execution_status = PCMK_EXEC_DONE;
3849  history->exit_status = PCMK_OCF_NOT_RUNNING;
3850  why = "equivalent probe result";
3851  }
3852 
3853  /* If the executor reported an execution status of anything but done or
3854  * error, consider that final. But for done or error, we know better whether
3855  * it should be treated as a failure or not, because we know the expected
3856  * result.
3857  */
3858  switch (history->execution_status) {
3859  case PCMK_EXEC_DONE:
3860  case PCMK_EXEC_ERROR:
3861  break;
3862 
3863  // These should be treated as node-fatal
3865  case PCMK_EXEC_NO_SECRETS:
3866  remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3867  "node-fatal error");
3868  goto remap_done;
3869 
3870  default:
3871  goto remap_done;
3872  }
3873 
3874  is_probe = pcmk_xe_is_probe(history->xml);
3875  if (is_probe) {
3876  task = "probe";
3877  }
3878 
3879  if (history->expected_exit_status < 0) {
3880  /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3881  * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3882  * expected exit status in the transition key, which (along with the
3883  * similar case of a corrupted transition key in the CIB) will be
3884  * reported to this function as -1. Pacemaker 2.0+ does not support
3885  * rolling upgrades from those versions or processing of saved CIB files
3886  * from those versions, so we do not need to care much about this case.
3887  */
3888  remap_because(history, &why, PCMK_EXEC_ERROR,
3889  "obsolete history format");
3890  crm_warn("Expected result not found for %s on %s "
3891  "(corrupt or obsolete CIB?)",
3892  history->key, pe__node_name(history->node));
3893 
3894  } else if (history->exit_status == history->expected_exit_status) {
3895  remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
3896 
3897  } else {
3898  remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
3899  pe_rsc_debug(history->rsc,
3900  "%s on %s: expected %d (%s), got %d (%s%s%s)",
3901  history->key, pe__node_name(history->node),
3902  history->expected_exit_status,
3903  services_ocf_exitcode_str(history->expected_exit_status),
3904  history->exit_status,
3905  services_ocf_exitcode_str(history->exit_status),
3906  (pcmk__str_empty(history->exit_reason)? "" : ": "),
3907  pcmk__s(history->exit_reason, ""));
3908  }
3909 
3910  switch (history->exit_status) {
3911  case PCMK_OCF_OK:
3912  if (is_probe
3913  && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
3914  char *last_change_s = last_change_str(history->xml);
3915 
3916  remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3917  pe_rsc_info(history->rsc, "Probe found %s active on %s at %s",
3918  history->rsc->id, pe__node_name(history->node),
3919  last_change_s);
3920  free(last_change_s);
3921  }
3922  break;
3923 
3924  case PCMK_OCF_NOT_RUNNING:
3925  if (is_probe
3926  || (history->expected_exit_status == history->exit_status)
3927  || !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) {
3928 
3929  /* For probes, recurring monitors for the Stopped role, and
3930  * unmanaged resources, "not running" is not considered a
3931  * failure.
3932  */
3933  remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
3934  history->rsc->role = pcmk_role_stopped;
3935  *on_fail = pcmk_on_fail_ignore;
3936  pe__set_next_role(history->rsc, pcmk_role_unknown,
3937  "not running");
3938  }
3939  break;
3940 
3942  if (is_probe
3943  && (history->exit_status != history->expected_exit_status)) {
3944  char *last_change_s = last_change_str(history->xml);
3945 
3946  remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3947  pe_rsc_info(history->rsc,
3948  "Probe found %s active and promoted on %s at %s",
3949  history->rsc->id, pe__node_name(history->node),
3950  last_change_s);
3951  free(last_change_s);
3952  }
3953  if (!expired
3954  || (history->exit_status == history->expected_exit_status)) {
3955  history->rsc->role = pcmk_role_promoted;
3956  }
3957  break;
3958 
3960  if (!expired) {
3961  history->rsc->role = pcmk_role_promoted;
3962  }
3963  remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
3964  break;
3965 
3967  remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
3968  break;
3969 
3971  {
3972  guint interval_ms = 0;
3974  &interval_ms);
3975 
3976  if (interval_ms == 0) {
3977  if (!expired) {
3978  block_if_unrecoverable(history);
3979  }
3980  remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3981  "exit status");
3982  } else {
3983  remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
3984  "exit status");
3985  }
3986  }
3987  break;
3988 
3992  if (!expired) {
3993  block_if_unrecoverable(history);
3994  }
3995  remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
3996  break;
3997 
3998  default:
3999  if (history->execution_status == PCMK_EXEC_DONE) {
4000  char *last_change_s = last_change_str(history->xml);
4001 
4002  crm_info("Treating unknown exit status %d from %s of %s "
4003  "on %s at %s as failure",
4004  history->exit_status, task, history->rsc->id,
4005  pe__node_name(history->node), last_change_s);
4006  remap_because(history, &why, PCMK_EXEC_ERROR,
4007  "unknown exit status");
4008  free(last_change_s);
4009  }
4010  break;
4011  }
4012 
4013 remap_done:
4014  if (why != NULL) {
4015  pe_rsc_trace(history->rsc,
4016  "Remapped %s result from [%s: %s] to [%s: %s] "
4017  "because of %s",
4018  history->key, pcmk_exec_status_str(orig_exec_status),
4019  crm_exit_str(orig_exit_status),
4020  pcmk_exec_status_str(history->execution_status),
4021  crm_exit_str(history->exit_status), why);
4022  }
4023 }
4024 
4025 // return TRUE if start or monitor last failure but parameters changed
4026 static bool
4027 should_clear_for_param_change(const xmlNode *xml_op, const char *task,
4028  pcmk_resource_t *rsc, pcmk_node_t *node)
4029 {
4031  if (pe__bundle_needs_remote_name(rsc)) {
4032  /* We haven't allocated resources yet, so we can't reliably
4033  * substitute addr parameters for the REMOTE_CONTAINER_HACK.
4034  * When that's needed, defer the check until later.
4035  */
4036  pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure,
4037  rsc->cluster);
4038 
4039  } else {
4040  op_digest_cache_t *digest_data = NULL;
4041 
4042  digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4043  rsc->cluster);
4044  switch (digest_data->rc) {
4045  case pcmk__digest_unknown:
4046  crm_trace("Resource %s history entry %s on %s"
4047  " has no digest to compare",
4048  rsc->id, pe__xe_history_key(xml_op),
4049  node->details->id);
4050  break;
4051  case pcmk__digest_match:
4052  break;
4053  default:
4054  return TRUE;
4055  }
4056  }
4057  }
4058  return FALSE;
4059 }
4060 
4061 // Order action after fencing of remote node, given connection rsc
4062 static void
4063 order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
4065 {
4066  pcmk_node_t *remote_node = pe_find_node(scheduler->nodes, remote_conn->id);
4067 
4068  if (remote_node) {
4069  pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4070  FALSE, scheduler);
4071 
4073  }
4074 }
4075 
4076 static bool
4077 should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
4078  guint interval_ms, bool is_last_failure)
4079 {
4080  /* Clearing failures of recurring monitors has special concerns. The
4081  * executor reports only changes in the monitor result, so if the
4082  * monitor is still active and still getting the same failure result,
4083  * that will go undetected after the failure is cleared.
4084  *
4085  * Also, the operation history will have the time when the recurring
4086  * monitor result changed to the given code, not the time when the
4087  * result last happened.
4088  *
4089  * @TODO We probably should clear such failures only when the failure
4090  * timeout has passed since the last occurrence of the failed result.
4091  * However we don't record that information. We could maybe approximate
4092  * that by clearing only if there is a more recent successful monitor or
4093  * stop result, but we don't even have that information at this point
4094  * since we are still unpacking the resource's operation history.
4095  *
4096  * This is especially important for remote connection resources with a
4097  * reconnect interval, so in that case, we skip clearing failures
4098  * if the remote node hasn't been fenced.
4099  */
4100  if (rsc->remote_reconnect_ms
4102  && (interval_ms != 0)
4103  && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4104 
4105  pcmk_node_t *remote_node = pe_find_node(rsc->cluster->nodes, rsc->id);
4106 
4107  if (remote_node && !remote_node->details->remote_was_fenced) {
4108  if (is_last_failure) {
4109  crm_info("Waiting to clear monitor failure for remote node %s"
4110  " until fencing has occurred", rsc->id);
4111  }
4112  return TRUE;
4113  }
4114  }
4115  return FALSE;
4116 }
4117 
4136 static bool
4137 check_operation_expiry(struct action_history *history)
4138 {
4139  bool expired = false;
4140  bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
4141  time_t last_run = 0;
4142  int unexpired_fail_count = 0;
4143  const char *clear_reason = NULL;
4144 
4145  if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4146  pe_rsc_trace(history->rsc,
4147  "Resource history entry %s on %s is not expired: "
4148  "Not Installed does not expire",
4149  history->id, pe__node_name(history->node));
4150  return false; // "Not installed" must always be cleared manually
4151  }
4152 
4153  if ((history->rsc->failure_timeout > 0)
4155  &last_run) == 0)) {
4156 
4157  // Resource has a failure-timeout, and history entry has a timestamp
4158 
4159  time_t now = get_effective_time(history->rsc->cluster);
4160  time_t last_failure = 0;
4161 
4162  // Is this particular operation history older than the failure timeout?
4163  if ((now >= (last_run + history->rsc->failure_timeout))
4164  && !should_ignore_failure_timeout(history->rsc, history->task,
4165  history->interval_ms,
4166  is_last_failure)) {
4167  expired = true;
4168  }
4169 
4170  // Does the resource as a whole have an unexpired fail count?
4171  unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4172  &last_failure,
4174  history->xml);
4175 
4176  // Update scheduler recheck time according to *last* failure
4177  crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
4178  " last-failure@%lld",
4179  history->id, (long long) last_run, (expired? "" : "not "),
4180  (long long) now, unexpired_fail_count,
4181  history->rsc->failure_timeout, (long long) last_failure);
4182  last_failure += history->rsc->failure_timeout + 1;
4183  if (unexpired_fail_count && (now < last_failure)) {
4184  pe__update_recheck_time(last_failure, history->rsc->cluster,
4185  "fail count expiration");
4186  }
4187  }
4188 
4189  if (expired) {
4190  if (pe_get_failcount(history->node, history->rsc, NULL,
4191  pcmk__fc_default, history->xml)) {
4192  // There is a fail count ignoring timeout
4193 
4194  if (unexpired_fail_count == 0) {
4195  // There is no fail count considering timeout
4196  clear_reason = "it expired";
4197 
4198  } else {
4199  /* This operation is old, but there is an unexpired fail count.
4200  * In a properly functioning cluster, this should only be
4201  * possible if this operation is not a failure (otherwise the
4202  * fail count should be expired too), so this is really just a
4203  * failsafe.
4204  */
4205  pe_rsc_trace(history->rsc,
4206  "Resource history entry %s on %s is not expired: "
4207  "Unexpired fail count",
4208  history->id, pe__node_name(history->node));
4209  expired = false;
4210  }
4211 
4212  } else if (is_last_failure
4213  && (history->rsc->remote_reconnect_ms != 0)) {
4214  /* Clear any expired last failure when reconnect interval is set,
4215  * even if there is no fail count.
4216  */
4217  clear_reason = "reconnect interval is set";
4218  }
4219  }
4220 
4221  if (!expired && is_last_failure
4222  && should_clear_for_param_change(history->xml, history->task,
4223  history->rsc, history->node)) {
4224  clear_reason = "resource parameters have changed";
4225  }
4226 
4227  if (clear_reason != NULL) {
4228  pcmk_action_t *clear_op = NULL;
4229 
4230  // Schedule clearing of the fail count
4231  clear_op = pe__clear_failcount(history->rsc, history->node,
4232  clear_reason, history->rsc->cluster);
4233 
4234  if (pcmk_is_set(history->rsc->cluster->flags,
4236  && (history->rsc->remote_reconnect_ms != 0)) {
4237  /* If we're clearing a remote connection due to a reconnect
4238  * interval, we want to wait until any scheduled fencing
4239  * completes.
4240  *
4241  * We could limit this to remote_node->details->unclean, but at
4242  * this point, that's always true (it won't be reliable until
4243  * after unpack_node_history() is done).
4244  */
4245  crm_info("Clearing %s failure will wait until any scheduled "
4246  "fencing of %s completes",
4247  history->task, history->rsc->id);
4248  order_after_remote_fencing(clear_op, history->rsc,
4249  history->rsc->cluster);
4250  }
4251  }
4252 
4253  if (expired && (history->interval_ms == 0)
4254  && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4255  switch (history->exit_status) {
4256  case PCMK_OCF_OK:
4257  case PCMK_OCF_NOT_RUNNING:
4259  case PCMK_OCF_DEGRADED:
4261  // Don't expire probes that return these values
4262  pe_rsc_trace(history->rsc,
4263  "Resource history entry %s on %s is not expired: "
4264  "Probe result",
4265  history->id, pe__node_name(history->node));
4266  expired = false;
4267  break;
4268  }
4269  }
4270 
4271  return expired;
4272 }
4273 
4274 int
4275 pe__target_rc_from_xml(const xmlNode *xml_op)
4276 {
4277  int target_rc = 0;
4278  const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
4279 
4280  if (key == NULL) {
4281  return -1;
4282  }
4283  decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4284  return target_rc;
4285 }
4286 
4296 static void
4297 update_resource_state(struct action_history *history, int exit_status,
4298  const xmlNode *last_failure,
4299  enum action_fail_response *on_fail)
4300 {
4301  bool clear_past_failure = false;
4302 
4303  if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4304  || (!pe_rsc_is_bundled(history->rsc)
4305  && pcmk_xe_mask_probe_failure(history->xml))) {
4306  history->rsc->role = pcmk_role_stopped;
4307 
4308  } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4309  clear_past_failure = true;
4310 
4311  } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4312  pcmk__str_none)) {
4313  if ((last_failure != NULL)
4314  && pcmk__str_eq(history->key, pe__xe_history_key(last_failure),
4315  pcmk__str_none)) {
4316  clear_past_failure = true;
4317  }
4318  if (history->rsc->role < pcmk_role_started) {
4319  set_active(history->rsc);
4320  }
4321 
4322  } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4323  history->rsc->role = pcmk_role_started;
4324  clear_past_failure = true;
4325 
4326  } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4327  history->rsc->role = pcmk_role_stopped;
4328  clear_past_failure = true;
4329 
4330  } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4331  pcmk__str_none)) {
4332  history->rsc->role = pcmk_role_promoted;
4333  clear_past_failure = true;
4334 
4335  } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4336  pcmk__str_none)) {
4337  if (*on_fail == pcmk_on_fail_demote) {
4338  // Demote clears an error only if on-fail=demote
4339  clear_past_failure = true;
4340  }
4341  history->rsc->role = pcmk_role_unpromoted;
4342 
4343  } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4344  pcmk__str_none)) {
4345  history->rsc->role = pcmk_role_started;
4346  clear_past_failure = true;
4347 
4348  } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4349  pcmk__str_none)) {
4350  unpack_migrate_to_success(history);
4351 
4352  } else if (history->rsc->role < pcmk_role_started) {
4353  pe_rsc_trace(history->rsc, "%s active on %s",
4354  history->rsc->id, pe__node_name(history->node));
4355  set_active(history->rsc);
4356  }
4357 
4358  if (!clear_past_failure) {
4359  return;
4360  }
4361 
4362  switch (*on_fail) {
4363  case pcmk_on_fail_stop:
4364  case pcmk_on_fail_ban:
4367  pe_rsc_trace(history->rsc,
4368  "%s (%s) is not cleared by a completed %s",
4369  history->rsc->id, fail2text(*on_fail), history->task);
4370  break;
4371 
4372  case pcmk_on_fail_block:
4373  case pcmk_on_fail_ignore:
4374  case pcmk_on_fail_demote:
4375  case pcmk_on_fail_restart:
4377  *on_fail = pcmk_on_fail_ignore;
4378  pe__set_next_role(history->rsc, pcmk_role_unknown,
4379  "clear past failures");
4380  break;
4381 
4383  if (history->rsc->remote_reconnect_ms == 0) {
4384  /* With no reconnect interval, the connection is allowed to
4385  * start again after the remote node is fenced and
4386  * completely stopped. (With a reconnect interval, we wait
4387  * for the failure to be cleared entirely before attempting
4388  * to reconnect.)
4389  */
4390  *on_fail = pcmk_on_fail_ignore;
4391  pe__set_next_role(history->rsc, pcmk_role_unknown,
4392  "clear past failures and reset remote");
4393  }
4394  break;
4395  }
4396 }
4397 
4406 static inline bool
4407 can_affect_state(struct action_history *history)
4408 {
4409 #if 0
4410  /* @COMPAT It might be better to parse only actions we know we're interested
4411  * in, rather than exclude a couple we don't. However that would be a
4412  * behavioral change that should be done at a major or minor series release.
4413  * Currently, unknown operations can affect whether a resource is considered
4414  * active and/or failed.
4415  */
4416  return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4420  "asyncmon", NULL);
4421 #else
4422  return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY,
4423  PCMK_ACTION_META_DATA, NULL);
4424 #endif
4425 }
4426 
4435 static int
4436 unpack_action_result(struct action_history *history)
4437 {
4438  if ((crm_element_value_int(history->xml, XML_LRM_ATTR_OPSTATUS,
4439  &(history->execution_status)) < 0)
4440  || (history->execution_status < PCMK_EXEC_PENDING)
4441  || (history->execution_status > PCMK_EXEC_MAX)
4442  || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4443  crm_err("Ignoring resource history entry %s for %s on %s "
4444  "with invalid " XML_LRM_ATTR_OPSTATUS " '%s'",
4445  history->id, history->rsc->id, pe__node_name(history->node),
4446  pcmk__s(crm_element_value(history->xml, XML_LRM_ATTR_OPSTATUS),
4447  ""));
4448  return pcmk_rc_unpack_error;
4449  }
4450  if ((crm_element_value_int(history->xml, XML_LRM_ATTR_RC,
4451  &(history->exit_status)) < 0)
4452  || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4453 #if 0
4454  /* @COMPAT We should ignore malformed entries, but since that would
4455  * change behavior, it should be done at a major or minor series
4456  * release.
4457  */
4458  crm_err("Ignoring resource history entry %s for %s on %s "
4459  "with invalid " XML_LRM_ATTR_RC " '%s'",
4460  history->id, history->rsc->id, pe__node_name(history->node),
4461  pcmk__s(crm_element_value(history->xml, XML_LRM_ATTR_RC),
4462  ""));
4463  return pcmk_rc_unpack_error;
4464 #else
4465  history->exit_status = CRM_EX_ERROR;
4466 #endif
4467  }
4468  history->exit_reason = crm_element_value(history->xml,
4470  return pcmk_rc_ok;
4471 }
4472 
4483 static int
4484 process_expired_result(struct action_history *history, int orig_exit_status)
4485 {
4486  if (!pe_rsc_is_bundled(history->rsc)
4487  && pcmk_xe_mask_probe_failure(history->xml)
4488  && (orig_exit_status != history->expected_exit_status)) {
4489 
4490  if (history->rsc->role <= pcmk_role_stopped) {
4491  history->rsc->role = pcmk_role_unknown;
4492  }
4493  crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
4494  "Masked failure expired",
4495  history->id, history->rsc->id,
4496  pe__node_name(history->node));
4497  return pcmk_rc_ok;
4498  }
4499 
4500  if (history->exit_status == history->expected_exit_status) {
4501  return pcmk_rc_undetermined; // Only failures expire
4502  }
4503 
4504  if (history->interval_ms == 0) {
4505  crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
4506  "Expired failure",
4507  history->id, history->task, history->rsc->id,
4508  pe__node_name(history->node));
4509  return pcmk_rc_ok;
4510  }
4511 
4512  if (history->node->details->online && !history->node->details->unclean) {
4513  /* Reschedule the recurring action. schedule_cancel() won't work at
4514  * this stage, so as a hacky workaround, forcibly change the restart
4515  * digest so pcmk__check_action_config() does what we want later.
4516  *
4517  * @TODO We should skip this if there is a newer successful monitor.
4518  * Also, this causes rescheduling only if the history entry
4519  * has an op-digest (which the expire-non-blocked-failure
4520  * scheduler regression test doesn't, but that may not be a
4521  * realistic scenario in production).
4522  */
4523  crm_notice("Rescheduling %s-interval %s of %s on %s "
4524  "after failure expired",
4525  pcmk__readable_interval(history->interval_ms), history->task,
4526  history->rsc->id, pe__node_name(history->node));
4528  "calculated-failure-timeout");
4529  return pcmk_rc_ok;
4530  }
4531 
4532  return pcmk_rc_undetermined;
4533 }
4534 
4544 static void
4545 mask_probe_failure(struct action_history *history, int orig_exit_status,
4546  const xmlNode *last_failure,
4547  enum action_fail_response *on_fail)
4548 {
4549  pcmk_resource_t *ban_rsc = history->rsc;
4550 
4551  if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) {
4552  ban_rsc = uber_parent(history->rsc);
4553  }
4554 
4555  crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
4556  services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
4557  pe__node_name(history->node));
4558  update_resource_state(history, history->expected_exit_status, last_failure,
4559  on_fail);
4560  crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname);
4561 
4562  record_failed_op(history);
4563  resource_location(ban_rsc, history->node, -INFINITY, "masked-probe-failure",
4564  history->rsc->cluster);
4565 }
4566 
4578 static bool
4579 failure_is_newer(const struct action_history *history,
4580  const xmlNode *last_failure)
4581 {
4582  guint failure_interval_ms = 0U;
4583  long long failure_change = 0LL;
4584  long long this_change = 0LL;
4585 
4586  if (last_failure == NULL) {
4587  return false; // Resource has no last_failure entry
4588  }
4589 
4590  if (!pcmk__str_eq(history->task,
4591  crm_element_value(last_failure, XML_LRM_ATTR_TASK),
4592  pcmk__str_none)) {
4593  return false; // last_failure is for different action
4594  }
4595 
4597  &failure_interval_ms) != pcmk_ok)
4598  || (history->interval_ms != failure_interval_ms)) {
4599  return false; // last_failure is for action with different interval
4600  }
4601 
4603  &this_change, 0LL) != pcmk_rc_ok)
4604  || (pcmk__scan_ll(crm_element_value(last_failure,
4606  &failure_change, 0LL) != pcmk_rc_ok)
4607  || (failure_change < this_change)) {
4608  return false; // Failure is not known to be newer
4609  }
4610 
4611  return true;
4612 }
4613 
4621 static void
4622 process_pending_action(struct action_history *history,
4623  const xmlNode *last_failure)
4624 {
4625  /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
4626  * and there might be a RSC_monitor_INTERVAL entry with the last successful
4627  * or pending result.
4628  *
4629  * If last_failure contains the failure of the pending recurring monitor
4630  * we're processing here, and is newer, the action is no longer pending.
4631  * (Pending results have call ID -1, which sorts last, so the last failure
4632  * if any should be known.)
4633  */
4634  if (failure_is_newer(history, last_failure)) {
4635  return;
4636  }
4637 
4638  if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4640  set_active(history->rsc);
4641 
4642  } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4643  history->rsc->role = pcmk_role_promoted;
4644 
4645  } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4646  && history->node->details->unclean) {
4647  /* A migrate_to action is pending on a unclean source, so force a stop
4648  * on the target.
4649  */
4650  const char *migrate_target = NULL;
4651  pcmk_node_t *target = NULL;
4652 
4653  migrate_target = crm_element_value(history->xml,
4655  target = pe_find_node(history->rsc->cluster->nodes, migrate_target);
4656  if (target != NULL) {
4657  stop_action(history->rsc, target, FALSE);
4658  }
4659  }
4660 
4661  if (history->rsc->pending_task != NULL) {
4662  /* There should never be multiple pending actions, but as a failsafe,
4663  * just remember the first one processed for display purposes.
4664  */
4665  return;
4666  }
4667 
4668  if (pcmk_is_probe(history->task, history->interval_ms)) {
4669  /* Pending probes are currently never displayed, even if pending
4670  * operations are requested. If we ever want to change that,
4671  * enable the below and the corresponding part of
4672  * native.c:native_pending_task().
4673  */
4674 #if 0
4675  history->rsc->pending_task = strdup("probe");
4676  history->rsc->pending_node = history->node;
4677 #endif
4678  } else {
4679  history->rsc->pending_task = strdup(history->task);
4680  history->rsc->pending_node = history->node;
4681  }
4682 }
4683 
4684 static void
4685 unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
4686  xmlNode **last_failure, enum action_fail_response *on_fail)
4687 {
4688  int old_rc = 0;
4689  bool expired = false;
4690  pcmk_resource_t *parent = rsc;
4691  enum rsc_role_e fail_role = pcmk_role_unknown;
4692  enum action_fail_response failure_strategy = pcmk_on_fail_restart;
4693 
4694  struct action_history history = {
4695  .rsc = rsc,
4696  .node = node,
4697  .xml = xml_op,
4698  .execution_status = PCMK_EXEC_UNKNOWN,
4699  };
4700 
4701  CRM_CHECK(rsc && node && xml_op, return);
4702 
4703  history.id = ID(xml_op);
4704  if (history.id == NULL) {
4705  crm_err("Ignoring resource history entry for %s on %s without ID",
4706  rsc->id, pe__node_name(node));
4707  return;
4708  }
4709 
4710  // Task and interval
4711  history.task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
4712  if (history.task == NULL) {
4713  crm_err("Ignoring resource history entry %s for %s on %s without "
4714  XML_LRM_ATTR_TASK, history.id, rsc->id, pe__node_name(node));
4715  return;
4716  }
4718  &(history.interval_ms));
4719  if (!can_affect_state(&history)) {
4720  pe_rsc_trace(rsc,
4721  "Ignoring resource history entry %s for %s on %s "
4722  "with irrelevant action '%s'",
4723  history.id, rsc->id, pe__node_name(node), history.task);
4724  return;
4725  }
4726 
4727  if (unpack_action_result(&history) != pcmk_rc_ok) {
4728  return; // Error already logged
4729  }
4730 
4731  history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4732  history.key = pe__xe_history_key(xml_op);
4733  crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &(history.call_id));
4734 
4735  pe_rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4736  history.id, history.task, history.call_id, pe__node_name(node),
4737  pcmk_exec_status_str(history.execution_status),
4738  crm_exit_str(history.exit_status));
4739 
4740  if (node->details->unclean) {
4741  pe_rsc_trace(rsc,
4742  "%s is running on %s, which is unclean (further action "
4743  "depends on value of stop's on-fail attribute)",
4744  rsc->id, pe__node_name(node));
4745  }
4746 
4747  expired = check_operation_expiry(&history);
4748  old_rc = history.exit_status;
4749 
4750  remap_operation(&history, on_fail, expired);
4751 
4752  if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4753  goto done;
4754  }
4755 
4756  if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4757  mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4758  goto done;
4759  }
4760 
4761  if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
4762  parent = uber_parent(rsc);
4763  }
4764 
4765  switch (history.execution_status) {
4766  case PCMK_EXEC_PENDING:
4767  process_pending_action(&history, *last_failure);
4768  goto done;
4769 
4770  case PCMK_EXEC_DONE:
4771  update_resource_state(&history, history.exit_status, *last_failure,
4772  on_fail);
4773  goto done;
4774 
4776  unpack_failure_handling(&history, &failure_strategy, &fail_role);
4777  if (failure_strategy == pcmk_on_fail_ignore) {
4778  crm_warn("Cannot ignore failed %s of %s on %s: "
4779  "Resource agent doesn't exist "
4780  CRM_XS " status=%d rc=%d id=%s",
4781  history.task, rsc->id, pe__node_name(node),
4782  history.execution_status, history.exit_status,
4783  history.id);
4784  /* Also for printing it as "FAILED" by marking it as
4785  * pcmk_rsc_failed later
4786  */
4787  *on_fail = pcmk_on_fail_ban;
4788  }
4789  resource_location(parent, node, -INFINITY, "hard-error",
4790  rsc->cluster);
4791  unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4792  last_failure, on_fail);
4793  goto done;
4794 
4796  if (pe__is_guest_or_remote_node(node)
4797  && pcmk_is_set(node->details->remote_rsc->flags,
4798  pcmk_rsc_managed)) {
4799  /* We should never get into a situation where a managed remote
4800  * connection resource is considered OK but a resource action
4801  * behind the connection gets a "not connected" status. But as a
4802  * fail-safe in case a bug or unusual circumstances do lead to
4803  * that, ensure the remote connection is considered failed.
4804  */
4807  }
4808  break; // Not done, do error handling
4809 
4810  case PCMK_EXEC_ERROR:
4811  case PCMK_EXEC_ERROR_HARD:
4812  case PCMK_EXEC_ERROR_FATAL:
4813  case PCMK_EXEC_TIMEOUT:
4815  case PCMK_EXEC_INVALID:
4816  break; // Not done, do error handling
4817 
4818  default: // No other value should be possible at this point
4819  break;
4820  }
4821 
4822  unpack_failure_handling(&history, &failure_strategy, &fail_role);
4823  if ((failure_strategy == pcmk_on_fail_ignore)
4824  || ((failure_strategy == pcmk_on_fail_restart_container)
4825  && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4826 
4827  char *last_change_s = last_change_str(xml_op);
4828 
4829  crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4830  CRM_XS " %s",
4831  history.task, services_ocf_exitcode_str(history.exit_status),
4832  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4833  pcmk__s(history.exit_reason, ""), rsc->id, pe__node_name(node),
4834  last_change_s, history.id);
4835  free(last_change_s);
4836 
4837  update_resource_state(&history, history.expected_exit_status,
4838  *last_failure, on_fail);
4839  crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
4841 
4842  record_failed_op(&history);
4843 
4844  if ((failure_strategy == pcmk_on_fail_restart_container)
4845  && cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) {
4846  *on_fail = failure_strategy;
4847  }
4848 
4849  } else {
4850  unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4851  last_failure, on_fail);
4852 
4853  if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4854  uint8_t log_level = LOG_ERR;
4855 
4856  if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4857  log_level = LOG_NOTICE;
4858  }
4859  do_crm_log(log_level,
4860  "Preventing %s from restarting on %s because "
4861  "of hard failure (%s%s%s) " CRM_XS " %s",
4862  parent->id, pe__node_name(node),
4863  services_ocf_exitcode_str(history.exit_status),
4864  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4865  pcmk__s(history.exit_reason, ""), history.id);
4866  resource_location(parent, node, -INFINITY, "hard-error",
4867  rsc->cluster);
4868 
4869  } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4870  crm_err("Preventing %s from restarting anywhere because "
4871  "of fatal failure (%s%s%s) " CRM_XS " %s",
4872  parent->id, services_ocf_exitcode_str(history.exit_status),
4873  (pcmk__str_empty(history.exit_reason)? "" : ": "),
4874  pcmk__s(history.exit_reason, ""), history.id);
4875  resource_location(parent, NULL, -INFINITY, "fatal-error",
4876  rsc->cluster);
4877  }
4878  }
4879 
4880 done:
4881  pe_rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
4882  rsc->id, pe__node_name(node), history.id,
4883  role2text(rsc->role), role2text(rsc->next_role));
4884 }
4885 
4886 static void
4887 add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
4889 {
4890  const char *cluster_name = NULL;
4891 
4892  pe_rule_eval_data_t rule_data = {
4893  .node_hash = NULL,
4894  .role = pcmk_role_unknown,
4895  .now = scheduler->now,
4896  .match_data = NULL,
4897  .rsc_data = NULL,
4898  .op_data = NULL
4899  };
4900 
4901  g_hash_table_insert(node->details->attrs,
4902  strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
4903 
4904  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
4905  strdup(node->details->id));
4906  if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) {
4907  scheduler->dc_node = node;
4908  node->details->is_dc = TRUE;
4909  g_hash_table_insert(node->details->attrs,
4910  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
4911  } else {
4912  g_hash_table_insert(node->details->attrs,
4913  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
4914  }
4915 
4916  cluster_name = g_hash_table_lookup(scheduler->config_hash, "cluster-name");
4917  if (cluster_name) {
4918  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
4919  strdup(cluster_name));
4920  }
4921 
4922  pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, &rule_data,
4923  node->details->attrs, NULL, overwrite,
4924  scheduler);
4925 
4926  pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, &rule_data,
4927  node->details->utilization, NULL,
4928  FALSE, scheduler);
4929 
4930  if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
4931  const char *site_name = pe_node_attribute_raw(node, "site-name");
4932 
4933  if (site_name) {
4934  g_hash_table_insert(node->details->attrs,
4935  strdup(CRM_ATTR_SITE_NAME),
4936  strdup(site_name));
4937 
4938  } else if (cluster_name) {
4939  /* Default to cluster-name if unset */
4940  g_hash_table_insert(node->details->attrs,
4941  strdup(CRM_ATTR_SITE_NAME),
4942  strdup(cluster_name));
4943  }
4944  }
4945 }
4946 
4947 static GList *
4948 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
4949 {
4950  int counter = -1;
4951  int stop_index = -1;
4952  int start_index = -1;
4953 
4954  xmlNode *rsc_op = NULL;
4955 
4956  GList *gIter = NULL;
4957  GList *op_list = NULL;
4958  GList *sorted_op_list = NULL;
4959 
4960  /* extract operations */
4961  op_list = NULL;
4962  sorted_op_list = NULL;
4963 
4964  for (rsc_op = pcmk__xe_first_child(rsc_entry);
4965  rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
4966 
4967  if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP,
4968  pcmk__str_none)) {
4969  crm_xml_add(rsc_op, "resource", rsc);
4970  crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
4971  op_list = g_list_prepend(op_list, rsc_op);
4972  }
4973  }
4974 
4975  if (op_list == NULL) {
4976  /* if there are no operations, there is nothing to do */
4977  return NULL;
4978  }
4979 
4980  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
4981 
4982  /* create active recurring operations as optional */
4983  if (active_filter == FALSE) {
4984  return sorted_op_list;
4985  }
4986 
4987  op_list = NULL;
4988 
4989  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
4990 
4991  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
4992  xmlNode *rsc_op = (xmlNode *) gIter->data;
4993 
4994  counter++;
4995 
4996  if (start_index < stop_index) {
4997  crm_trace("Skipping %s: not active", ID(rsc_entry));
4998  break;
4999 
5000  } else if (counter < start_index) {
5001  crm_trace("Skipping %s: old", ID(rsc_op));
5002  continue;
5003  }
5004  op_list = g_list_append(op_list, rsc_op);
5005  }
5006 
5007  g_list_free(sorted_op_list);
5008  return op_list;
5009 }
5010 
5011 GList *
5012 find_operations(const char *rsc, const char *node, gboolean active_filter,
5014 {
5015  GList *output = NULL;
5016  GList *intermediate = NULL;
5017 
5018  xmlNode *tmp = NULL;
5019  xmlNode *status = find_xml_node(scheduler->input, XML_CIB_TAG_STATUS, TRUE);
5020 
5021  pcmk_node_t *this_node = NULL;
5022 
5023  xmlNode *node_state = NULL;
5024 
5025  for (node_state = pcmk__xe_first_child(status); node_state != NULL;
5026  node_state = pcmk__xe_next(node_state)) {
5027 
5028  if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
5029  const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
5030 
5031  if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5032  continue;
5033  }
5034 
5035  this_node = pe_find_node(scheduler->nodes, uname);
5036  if(this_node == NULL) {
5037  CRM_LOG_ASSERT(this_node != NULL);
5038  continue;
5039 
5040  } else if (pe__is_guest_or_remote_node(this_node)) {
5041  determine_remote_online_status(scheduler, this_node);
5042 
5043  } else {
5044  determine_online_status(node_state, this_node, scheduler);
5045  }
5046 
5047  if (this_node->details->online
5049  /* offline nodes run no resources...
5050  * unless stonith is enabled in which case we need to
5051  * make sure rsc start events happen after the stonith
5052  */
5053  xmlNode *lrm_rsc = NULL;
5054 
5055  tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
5056  tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
5057 
5058  for (lrm_rsc = pcmk__xe_first_child(tmp); lrm_rsc != NULL;
5059  lrm_rsc = pcmk__xe_next(lrm_rsc)) {
5060 
5061  if (pcmk__str_eq((const char *)lrm_rsc->name,
5063 
5064  const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
5065 
5066  if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
5067  continue;
5068  }
5069 
5070  intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5071  output = g_list_concat(output, intermediate);
5072  }
5073  }
5074  }
5075  }
5076  }
5077 
5078  return output;
5079 }
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition: complex.c:962
GHashTable * tags
Configuration tags (ID -> pcmk_tag_t *)
Definition: scheduler.h:218
Services API.
pcmk__cpg_host_t host
Definition: cpg.c:49
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:238
bool pe_can_fence(const pcmk_scheduler_t *scheduler, const pcmk_node_t *node)
Definition: utils.c:36
#define XML_RSC_OP_LAST_CHANGE
Definition: msg_xml.h:326
void verify_pe_options(GHashTable *options)
Definition: common.c:308
enum pe_quorum_policy no_quorum_policy
Response to loss of quorum.
Definition: scheduler.h:186
bool pe__shutdown_requested(const pcmk_node_t *node)
Definition: utils.c:666
A dumping ground.
pcmk_ticket_t * ticket_new(const char *ticket_id, pcmk_scheduler_t *scheduler)
Definition: utils.c:510
pcmk_node_t * pe__copy_node(const pcmk_node_t *this_node)
Definition: utils.c:89
Service failed and possibly in promoted role.
Definition: results.h:179
Whether resource has been removed but has a container.
Definition: resources.h:112
#define crm_notice(fmt, args...)
Definition: logging.h:383
GHashTable * known_on
Nodes where resource has been probed (key is node ID, not name)
Definition: resources.h:463
bool pe__is_guest_or_remote_node(const pcmk_node_t *node)
Definition: remote.c:41
No connection to executor.
Definition: results.h:326
pcmk_scheduler_t * cluster
Cluster that resource is part of.
Definition: resources.h:412
gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
Definition: unpack.c:597
GHashTable * attrs
Node attributes.
Definition: nodes.h:115
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:36
gboolean unseen
Whether node has never joined cluster.
Definition: nodes.h:77
#define XML_CONFIG_ATTR_SHUTDOWN_LOCK
Definition: msg_xml.h:403
gboolean fixed
Definition: nodes.h:132
char data[0]
Definition: cpg.c:55
#define INFINITY
Definition: crm.h:98
Whether action should not be executed.
Definition: actions.h:244
pcmk_node_t *(* location)(const pcmk_resource_t *rsc, GList **list, int current)
List nodes where a resource (or any of its children) is.
Definition: resources.h:339
Service active and promoted.
Definition: results.h:178
#define pe__set_action_flags(action, flags_to_set)
Definition: internal.h:76
#define CRM_ATTR_KIND
Definition: crm.h:115
bool pe__is_universal_clone(const pcmk_resource_t *rsc, const pcmk_scheduler_t *scheduler)
Definition: clone.c:1240
pcmk_node_t * partial_migration_target
The destination node, if migrate_to completed but migrate_from has not.
Definition: resources.h:454
#define XML_NODE_IS_FENCED
Definition: msg_xml.h:289
const char * pe_node_attribute_raw(const pcmk_node_t *node, const char *name)
Definition: common.c:621
pcmk_resource_t * pe__find_bundle_replica(const pcmk_resource_t *bundle, const pcmk_node_t *node)
Definition: bundle.c:1367
GHashTable * state
XML attributes from ticket state.
Definition: tickets.h:32
int pcmk__scan_min_int(const char *text, int *result, int minimum)
Definition: strings.c:127
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
Definition: complex.c:936
#define CRM_ATTR_IS_DC
Definition: crm.h:117
#define stop_action(rsc, node, optional)
Definition: internal.h:379
Stopped.
Definition: roles.h:29
const char * name
Definition: cib.c:26
bool pcmk__strcase_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:933
#define XML_ATTR_QUORUM_PANIC
Definition: msg_xml.h:144
#define XML_ATTR_TYPE
Definition: msg_xml.h:160
Whether cluster is symmetric (via symmetric-cluster property)
Definition: scheduler.h:74
#define XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY
Definition: msg_xml.h:405
#define XPATH_NODE_STATE
Definition: unpack.c:2826
enum rsc_role_e role
Resource&#39;s current role.
Definition: resources.h:468
#define XML_TAG_UTILIZATION
Definition: msg_xml.h:232
gint pe__cmp_node_name(gconstpointer a, gconstpointer b)
Definition: utils.c:146
Whether partition has quorum (via have-quorum property)
Definition: scheduler.h:71
#define pcmk__config_warn(fmt...)
Ban resource from current node.
Definition: actions.h:169
GList * children
Resource&#39;s child resources, if any.
Definition: resources.h:475
Match only clones and their instances, by either clone or instance ID.
Definition: resources.h:205
#define XML_RULE_ATTR_SCORE
Definition: msg_xml.h:341
enum pcmk__digest_result rc
Definition: internal.h:455
#define XML_BOOLEAN_FALSE
Definition: msg_xml.h:168
gboolean standby
Whether ticket is temporarily suspended.
Definition: tickets.h:31
gboolean order_actions(pcmk_action_t *lh_action, pcmk_action_t *rh_action, uint32_t flags)
Definition: utils.c:450
int priority_fencing_delay
Priority fencing delay.
Definition: scheduler.h:226
xmlNode * first_named_child(const xmlNode *parent, const char *name)
Definition: xml.c:2484
pcmk_resource_t * pe__create_clone_child(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
Definition: clone.c:245
enum rsc_role_e next_role
Resource&#39;s scheduled next role.
Definition: resources.h:469
Fence resource&#39;s node.
Definition: actions.h:181
bool pcmk_is_probe(const char *task, guint interval)
Definition: actions.c:496
gboolean get_target_role(const pcmk_resource_t *rsc, enum rsc_role_e *role)
Definition: utils.c:411
void copy_in_properties(xmlNode *target, const xmlNode *src)
Definition: xml.c:456
Implementation of pcmk_action_t.
Definition: actions.h:390
int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b, bool same_node_default)
Definition: pe_actions.c:1682
int char2score(const char *score)
Get the integer value of a score string.
Definition: scores.c:36
#define pcmk__config_err(fmt...)
xmlNode * find_xml_node(const xmlNode *root, const char *search_path, gboolean must_find)
Definition: xml.c:384
#define PCMK_ACTION_META_DATA
Definition: actions.h:56
#define pe_proc_warn(fmt...)
Definition: internal.h:50
#define XML_TAG_TRANSIENT_NODEATTRS
Definition: msg_xml.h:420
#define PCMK_ACTION_MONITOR
Definition: actions.h:59
GHashTable * meta
Resource&#39;s meta-attributes.
Definition: resources.h:471
Service safely stopped.
Definition: results.h:177
#define set_config_flag(scheduler, option, flag)
Definition: unpack.c:52
#define XML_CIB_TAG_TAG
Definition: msg_xml.h:450
pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler)
Definition: pe_actions.c:1265
op_digest_cache_t * rsc_action_digest_cmp(pcmk_resource_t *rsc, const xmlNode *xml_op, pcmk_node_t *node, pcmk_scheduler_t *scheduler)
Definition: pe_digest.c:389
#define XML_LRM_TAG_RESOURCE
Definition: msg_xml.h:278
Unspecified error.
Definition: results.h:241
Whether unseen nodes should be fenced (via startup-fencing property)
Definition: scheduler.h:116
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition: nvpair.c:302
#define pe__set_working_set_flags(scheduler, flags_to_set)
Definition: internal.h:52
#define PCMK_ACTION_MIGRATE_TO
Definition: actions.h:58
gboolean pending
Whether controller membership is pending.
Definition: nodes.h:75
#define XML_NVPAIR_ATTR_NAME
Definition: msg_xml.h:393
#define SUB_XPATH_LRM_RSC_OP
Definition: unpack.c:2831
Promoted.
Definition: roles.h:32
#define XML_NODE_IS_MAINTENANCE
Definition: msg_xml.h:290
char * id
XML ID of tag.
Definition: tags.h:27
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
Definition: pe_actions.c:1815
#define XML_CIB_TAG_RSC_TEMPLATE
Definition: msg_xml.h:240
Necessary CIB secrets are unavailable.
Definition: results.h:329
Whether concurrent fencing is allowed (via concurrent-fencing property)
Definition: scheduler.h:89
action_fail_response
Possible responses to a resource action failure.
Definition: actions.h:149
#define CRM_LOG_ASSERT(expr)
Definition: logging.h:222
const char * pe_pref(GHashTable *options, const char *name)
Definition: common.c:314
Service promoted but more likely to fail soon.
Definition: results.h:181
pcmk_action_t * pe__clear_failcount(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *reason, pcmk_scheduler_t *scheduler)
Schedule a controller operation to clear a fail count.
Definition: failcounts.c:453
enum crm_ais_msg_types type
Definition: cpg.c:48
#define XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT
Definition: msg_xml.h:406
#define CRMD_JOINSTATE_NACK
Definition: crm.h:161
#define XML_CIB_TAG_LRM
Definition: msg_xml.h:276
#define CRM_ATTR_CLUSTER_NAME
Definition: crm.h:118
Ensure crm_exit_t can hold this.
Definition: results.h:305
void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:110
GHashTable * tickets
Definition: scheduler.h:190
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1588
Action did not complete in time.
Definition: results.h:320
Cluster layer node.
Definition: nodes.h:34
#define XML_OP_ATTR_ON_FAIL
Definition: msg_xml.h:268
pcmk_scheduler_t * data_set
Cluster that node is part of.
Definition: nodes.h:126
pcmk_resource_t * container
Resource containing this one, if any.
Definition: resources.h:480
Demote if promotable, else stop.
Definition: actions.h:197
gboolean remote_was_fenced
Definition: nodes.h:94
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Definition: nvpair.c:483
Execution failed, do not retry on node.
Definition: results.h:323
bool pcmk__ends_with(const char *s, const char *match)
Definition: strings.c:533
Implementation of pcmk_scheduler_t.
Definition: scheduler.h:172
int pe__unpack_resource(xmlNode *xml_obj, pcmk_resource_t **rsc, pcmk_resource_t *parent, pcmk_scheduler_t *scheduler)
Definition: complex.c:603
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
Definition: xpath.c:211
#define pe_proc_err(fmt...)
Definition: internal.h:49
gboolean remote_requires_reset
Definition: nodes.h:88
Action was cancelled.
Definition: results.h:319
gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition: unpack.c:816
char * dc_uuid
Node ID of designated controller.
Definition: scheduler.h:177
int pe_get_failcount(const pcmk_node_t *node, pcmk_resource_t *rsc, time_t *last_failure, uint32_t flags, const xmlNode *xml_op)
Definition: failcounts.c:360
No fence device is configured for target.
Definition: results.h:328
const char * action
Definition: pcmk_fence.c:30
#define pe__set_resource_flags(resource, flags_to_set)
Definition: internal.h:64
GList * resources
Resources in cluster.
Definition: scheduler.h:196
int stonith_timeout
Value of stonith-timeout property.
Definition: scheduler.h:185
#define XML_CIB_TAG_PROPSET
Definition: msg_xml.h:226
#define PCMK__XA_CRMD
Definition: crm_internal.h:84
GList * nodes
Nodes in cluster.
Definition: scheduler.h:195
int pcmk__effective_rc(int rc)
Definition: agents.c:71
bool pcmk_xe_is_probe(const xmlNode *xml_op)
Definition: actions.c:507
#define XML_LRM_ATTR_RSCID
Definition: msg_xml.h:315
gboolean remote_maintenance
Definition: nodes.h:100
Stop resource and leave stopped.
Definition: actions.h:175
#define demote_action(rsc, node, optional)
Definition: internal.h:395
gboolean is_dc
Whether node is cluster&#39;s DC.
Definition: nodes.h:80
#define XML_TAG_ATTR_SETS
Definition: msg_xml.h:227
#define XML_LRM_ATTR_TASK
Definition: msg_xml.h:306
pcmk_resource_t *(* find_rsc)(pcmk_resource_t *rsc, const char *search, const pcmk_node_t *node, int flags)
Search for a resource ID in a resource and its children.
Definition: resources.h:287
gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc)
Parse a transition key into its constituent parts.
Definition: actions.c:272
const char * role2text(enum rsc_role_e role)
Definition: common.c:458
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port)
Definition: remote.c:160
#define PCMK__XA_EXPECTED
Definition: crm_internal.h:85
#define PCMK_ACTION_DEMOTE
Definition: actions.h:49
int weight
Node score for a given resource.
Definition: nodes.h:131
int pcmk__scan_ll(const char *text, long long *result, long long default_value)
Definition: strings.c:97
pcmk_resource_t * parent
Resource&#39;s parent resource, if any.
Definition: resources.h:413
GList * dangling_migrations
Definition: resources.h:478
#define CRMD_JOINSTATE_DOWN
Definition: crm.h:158
Maximum value for this enum.
Definition: results.h:332
#define crm_warn(fmt, args...)
Definition: logging.h:382
Whether any resource provides or requires unfencing (via CIB resources)
Definition: scheduler.h:86
pcmk_resource_t * pe_find_resource(GList *rsc_list, const char *id_rh)
Definition: status.c:391
guint remote_reconnect_ms
Retry interval for remote connections.
Definition: resources.h:427
#define XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT
Definition: msg_xml.h:404
void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
Definition: complex.c:1184
const char * crm_exit_str(crm_exit_t exit_code)
Definition: results.c:640
Put resource&#39;s node in standby.
Definition: actions.h:178
char * clone_zero(const char *last_rsc_id)
Definition: unpack.c:1887
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition: nvpair.c:540
Restart resource&#39;s container.
Definition: actions.h:186
Implementation of pcmk_resource_t.
Definition: resources.h:399
#define crm_debug(fmt, args...)
Definition: logging.h:386
#define XML_RSC_ATTR_CONTAINER
Definition: msg_xml.h:255
Utility functions.
Used only to initialize variables.
Definition: results.h:316
Primitive resource.
Definition: resources.h:34
#define XML_ATTR_ID
Definition: msg_xml.h:156
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:447
#define XML_CIB_TAG_RESOURCE
Definition: msg_xml.h:235
#define XML_BOOLEAN_TRUE
Definition: msg_xml.h:167
#define XML_CIB_TAG_STATE
Definition: msg_xml.h:222
const char * pe_base_name_end(const char *id)
Definition: unpack.c:1825
void resource_location(pcmk_resource_t *rsc, const pcmk_node_t *node, int score, const char *tag, pcmk_scheduler_t *scheduler)
Definition: utils.c:360
Parameter invalid (in local context)
Definition: results.h:172
gboolean unpacked
Whether node history has been unpacked.
Definition: nodes.h:102
Whether the cluster includes any Pacemaker Remote nodes (via CIB)
Definition: scheduler.h:134
#define PCMK__XA_IN_CCM
Definition: crm_internal.h:88
Parameter invalid (inherently)
Definition: results.h:176
#define pe_warn_once(pe_wo_bit, fmt...)
Definition: internal.h:142
Whether resource is considered failed.
Definition: resources.h:151
#define CRM_ATTR_UNAME
Definition: crm.h:113
bool pcmk_xe_mask_probe_failure(const xmlNode *xml_op)
Definition: actions.c:518
Whether resource must be stopped (instead of demoted) if it is failed.
Definition: resources.h:139
#define crm_trace(fmt, args...)
Definition: logging.h:387
#define CRMD_JOINSTATE_MEMBER
Definition: crm.h:160
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:175
void pcmk__g_strcat(GString *buffer,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:1217
bool xml_contains_remote_node(xmlNode *xml)
Definition: remote.c:84
pcmk_node_t * pe_find_node(const GList *node_list, const char *node_name)
Find a node by name in a list of nodes.
Definition: status.c:473
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:99
Insufficient privileges.
Definition: results.h:174
xmlNode * add_node_copy(xmlNode *new_parent, xmlNode *xml_node)
Definition: xml.c:622
const char * stonith_action
Default fencing action.
Definition: scheduler.h:179
struct pe_node_shared_s * details
Basic node information.
Definition: nodes.h:134
bool pe__bundle_needs_remote_name(pcmk_resource_t *rsc)
Definition: bundle.c:920
#define crm_log_xml_debug(xml, text)
Definition: logging.h:394
#define PCMK_ACTION_START
Definition: actions.h:71
unsigned long long flags
Group of enum pcmk_rsc_flags.
Definition: resources.h:429
const char * uname
Node name in cluster.
Definition: nodes.h:68
Unpromoted.
Definition: roles.h:31
#define XML_TAG_META_SETS
Definition: msg_xml.h:228
void pcmk__str_update(char **str, const char *value)
Definition: strings.c:1193
Wrappers for and extensions to libxml2.
GHashTable * config_hash
Cluster properties.
Definition: scheduler.h:187
rsc_role_e
Definition: roles.h:27
#define XML_ATTR_UNAME
Definition: msg_xml.h:178
char * clone_name
Resource instance ID in history.
Definition: resources.h:401
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
Definition: utils.c:617
#define XML_RSC_ATTR_MANAGED
Definition: msg_xml.h:248
xmlNode * create_xml_node(xmlNode *parent, const char *name)
Definition: xml.c:638
void pe__unpack_dataset_nvpairs(const xmlNode *xml_obj, const char *set_name, const pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pcmk_scheduler_t *scheduler)
Definition: utils.c:707
time_t lock_time
When shutdown lock started.
Definition: resources.h:487
Action completed, result is known.
Definition: results.h:318
Ticket constraint object.
Definition: tickets.h:27
int crm_element_value_epoch(const xmlNode *xml, const char *name, time_t *dest)
Retrieve the seconds-since-epoch value of an XML attribute.
Definition: nvpair.c:568
#define PCMK_ACTION_STOP
Definition: actions.h:74
GHashTable * pe__node_list2table(const GList *list)
Definition: utils.c:116
#define PCMK_NODE_ATTR_TERMINATE
Definition: nodes.h:29
time_t last_granted
When cluster was last granted the ticket.
Definition: tickets.h:30
#define XML_RSC_ATTR_TARGET_ROLE
Definition: msg_xml.h:249
#define XML_LRM_ATTR_MIGRATE_TARGET
Definition: msg_xml.h:331
Execution failed, do not retry anywhere.
Definition: results.h:324
#define CIB_OPTIONS_FIRST
Definition: msg_xml.h:110
gboolean standby
Whether in standby mode.
Definition: nodes.h:73
#define XML_RSC_ATTR_REMOTE_NODE
Definition: msg_xml.h:258
#define XML_LRM_ATTR_EXIT_REASON
Definition: msg_xml.h:324
#define XML_LRM_ATTR_RESTART_DIGEST
Definition: msg_xml.h:322
void pe__free_digests(gpointer ptr)
Definition: pe_digest.c:34
gboolean expected_up
Whether expected join state is member.
Definition: nodes.h:79
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition: actions.c:42
Dependencies not available locally.
Definition: results.h:175
Whether resource is blocked from further action.
Definition: resources.h:109
Implementation of pcmk_node_t.
Definition: nodes.h:130
enum pe_obj_types variant
Resource variant.
Definition: resources.h:414
bool pcmk__str_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:957
xmlNode * input
CIB XML.
Definition: scheduler.h:175
gboolean granted
Whether cluster has been granted the ticket.
Definition: tickets.h:29
#define XML_CIB_TAG_NODE
Definition: msg_xml.h:223
const char * placement_strategy
Value of placement-strategy property.
Definition: scheduler.h:180
void pe__update_recheck_time(time_t recheck, pcmk_scheduler_t *scheduler, const char *reason)
Definition: utils.c:682
uint32_t id
Definition: cpg.c:45
gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
Definition: unpack.c:212
Whether resource has pending start action in history.
Definition: resources.h:160
const char * id
Node ID at the cluster layer.
Definition: nodes.h:67
char * id
XML ID of ticket constraint or state.
Definition: tickets.h:28
void native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_scheduler_t *scheduler, gboolean failed)
Definition: native.c:90
pcmk_node_t * pe_find_node_any(const GList *node_list, const char *id, const char *node_name)
Find a node by name or ID in a list of nodes.
Definition: status.c:426
guint shutdown_lock
How long to lock resources (seconds)
Definition: scheduler.h:225
Unspecified error.
Definition: results.h:171
pcmk_action_t * custom_action(pcmk_resource_t *rsc, char *key, const char *task, const pcmk_node_t *on_node, gboolean optional, pcmk_scheduler_t *scheduler)
Create or update an action object.
Definition: pe_actions.c:1117
GList * refs
XML IDs of objects that reference the tag.
Definition: tags.h:28
const char * target
Definition: pcmk_fence.c:29
GList * fillers
Resources contained by this one, if any.
Definition: resources.h:481
GList * running_rsc
List of resources active on node.
Definition: nodes.h:113
Whether resource has an ignorable failure.
Definition: resources.h:175
#define XML_ATTR_TRANSITION_KEY
Definition: msg_xml.h:416
gboolean rsc_discovery_enabled
Whether probes are allowed on node.
Definition: nodes.h:82
#define CRM_XS
Definition: logging.h:56
Pacemaker Remote node.
Definition: nodes.h:35
bool pe__is_guest_node(const pcmk_node_t *node)
Definition: remote.c:33
Requested action not implemented.
Definition: results.h:173
int crm_str_to_boolean(const char *s, int *ret)
Definition: strings.c:424
Basic node information (all node objects for the same node share this)
Definition: nodes.h:66
const char * localhost
Definition: scheduler.h:216
xmlXPathObjectPtr xpath_search(const xmlNode *xml_top, const char *path)
Definition: xpath.c:139
int pe__target_rc_from_xml(const xmlNode *xml_op)
Definition: unpack.c:4275
xmlNode * pcmk__find_action_config(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, bool include_disabled)
Definition: pe_actions.c:129
#define PCMK__XA_JOIN
Definition: crm_internal.h:89
Service active but more likely to fail soon.
Definition: results.h:180
gboolean is_remote_node
Whether this is a remote connection.
Definition: resources.h:432
const char * fail2text(enum action_fail_response fail)
Definition: common.c:320
Whether resource requires fencing before recovery if on unclean node.
Definition: resources.h:190
Agent does not implement requested action.
Definition: results.h:321
GHashTable * pcmk__strkey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition: strings.c:608
pcmk__action_result_t result
Definition: pcmk_fence.c:35
pcmk_rsc_methods_t * fns
Resource object methods.
Definition: resources.h:416
gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
Definition: unpack.c:884
guint node_pending_timeout
Pending join times out after this (ms)
Definition: scheduler.h:231
Whether to stop all resources (via stop-all-resources property)
Definition: scheduler.h:104
G_GNUC_INTERNAL gint pe__cmp_rsc_priority(gconstpointer a, gconstpointer b)
Definition: utils.c:296
gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
Definition: unpack.c:674
Whether fencing is enabled (via stonith-enabled property)
Definition: scheduler.h:80
#define XML_LRM_TAG_RESOURCES
Definition: msg_xml.h:277
void pe__unpack_node_health_scores(pcmk_scheduler_t *scheduler)
Definition: pe_health.c:23
#define crm_err(fmt, args...)
Definition: logging.h:381
pcmk_scheduler_t * scheduler
#define XML_CIB_TAG_TICKET_STATE
Definition: msg_xml.h:447
#define CRM_ASSERT(expr)
Definition: results.h:42
pcmk_node_t * lock_node
Resource shutdown-locked to this node.
Definition: resources.h:485
Success.
Definition: results.h:170
GHashTable * node_hash
Definition: common.h:80
char guint crm_parse_interval_spec(const char *input)
Parse milliseconds from a Pacemaker interval specification.
Definition: utils.c:271
char * pcmk__epoch2str(const time_t *source, uint32_t flags)
Definition: iso8601.c:1858
Whether resource is allowed to live-migrate.
Definition: resources.h:172
#define OFFLINESTATUS
Definition: util.h:38
#define PCMK_ACTION_MIGRATE_FROM
Definition: actions.h:57
#define XML_ATTR_HAVE_WATCHDOG
Definition: msg_xml.h:146
Started.
Definition: roles.h:30
#define XML_NODE_ATTR_RSC_DISCOVERY
Definition: msg_xml.h:396
Configuration tag object.
Definition: tags.h:26
#define XML_LRM_ATTR_INTERVAL_MS
Definition: msg_xml.h:304
#define XML_LRM_ATTR_CALLID
Definition: msg_xml.h:318
#define XML_NVPAIR_ATTR_VALUE
Definition: msg_xml.h:394
GHashTable * utilization
Node utilization attributes.
Definition: nodes.h:116
gboolean shutdown
Whether shutting down.
Definition: nodes.h:78
char uname[MAX_NAME]
Definition: cpg.c:50
#define pe__clear_resource_flags(resource, flags_to_clear)
Definition: internal.h:70
#define XML_LRM_ATTR_OPSTATUS
Definition: msg_xml.h:316
bool pe__is_remote_node(const pcmk_node_t *node)
Definition: remote.c:25
#define PCMK_ACTION_PROMOTE
Definition: actions.h:65
#define CRMD_JOINSTATE_PENDING
Definition: crm.h:159
GList * running_on
Nodes where resource may be active.
Definition: resources.h:460
CRM_TRACE_INIT_DATA(pe_status)
Agent or dependency not available locally.
Definition: results.h:325
gboolean maintenance
Whether in maintenance mode.
Definition: nodes.h:81
#define XML_LRM_ATTR_RC
Definition: msg_xml.h:317
#define pcmk_ok
Definition: results.h:68
void pe__clear_resource_history(pcmk_resource_t *rsc, const pcmk_node_t *node)
Definition: pe_actions.c:1664
GHashTable * digest_cache
Cache of calculated resource digests.
Definition: nodes.h:117
void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index)
Definition: unpack.c:2532
GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler)
Definition: unpack.c:5012
gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
Definition: unpack.c:1319
Action is in progress.
Definition: results.h:317
void destroy_ticket(gpointer data)
Definition: utils.c:498
#define XML_CIB_TAG_STATUS
Definition: msg_xml.h:204
#define XML_CIB_TAG_OBJ_REF
Definition: msg_xml.h:451
const char * pcmk__readable_interval(guint interval_ms)
Definition: iso8601.c:1926
pcmk_node_t * pending_node
Node on which pending_task is happening.
Definition: resources.h:484
#define SUB_XPATH_LRM_RESOURCE
Definition: unpack.c:2828
enum action_fail_response pcmk__parse_on_fail(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, const char *value)
Definition: pe_actions.c:889
gboolean crm_is_true(const char *s)
Definition: strings.c:416
#define CRM_ATTR_SITE_NAME
Definition: crm.h:119
Treat resource as unmanaged.
Definition: actions.h:172
Whether resource can be promoted and demoted.
Definition: resources.h:124
Whether cluster has a fencing resource (via CIB resources)
Definition: scheduler.h:83
#define XML_CIB_TAG_GROUP
Definition: msg_xml.h:236
#define PCMK_ACTION_OFF
Definition: actions.h:62
pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pcmk_scheduler_t *scheduler)
Definition: unpack.c:440
Resource role is unknown.
Definition: roles.h:28
#define XML_LRM_TAG_RSC_OP
Definition: msg_xml.h:279
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:37
#define ID(x)
Definition: msg_xml.h:474
unsigned long long flags
Group of enum pcmk_scheduler_flags.
Definition: scheduler.h:183
#define pe_err(fmt...)
Definition: internal.h:39
const char * parent
Definition: cib.c:27
Action cannot be attempted (e.g. shutdown)
Definition: results.h:327
#define ONLINESTATUS
Definition: util.h:37
enum rsc_role_e pcmk__role_after_failure(const pcmk_resource_t *rsc, const char *action_name, enum action_fail_response on_fail, GHashTable *meta)
Definition: pe_actions.c:1024
gboolean standby_onfail
Whether in standby mode due to on-fail.
Definition: nodes.h:74
#define XML_LRM_ATTR_MIGRATE_SOURCE
Definition: msg_xml.h:330
Whether resource is managed.
Definition: resources.h:106
time_t get_effective_time(pcmk_scheduler_t *scheduler)
Definition: utils.c:396
void freeXpathObject(xmlXPathObjectPtr xpathObj)
Definition: xpath.c:39
Restart resource.
Definition: actions.h:166
#define CRM_ATTR_ID
Definition: crm.h:114
gboolean unclean
Whether node requires fencing.
Definition: nodes.h:76
unsigned int timeout
Definition: pcmk_fence.c:32
void pe__add_param_check(const xmlNode *rsc_op, pcmk_resource_t *rsc, pcmk_node_t *node, enum pcmk__check_parameters, pcmk_scheduler_t *scheduler)
Definition: remote.c:225
Whether cluster is in maintenance mode (via maintenance-mode property)
Definition: scheduler.h:77
#define XPATH_ENABLE_UNFENCING
Definition: unpack.c:191
Whether resource has been removed from the configuration.
Definition: resources.h:103
enum node_type type
Node variant.
Definition: nodes.h:69
#define XML_CIB_TAG_TICKETS
Definition: msg_xml.h:446
crm_time_t * now
Current time for evaluation purposes.
Definition: scheduler.h:176
Execution failed, may be retried.
Definition: results.h:322
#define crm_info(fmt, args...)
Definition: logging.h:384
GHashTable * template_rsc_sets
Mappings of template ID to resource ID.
Definition: scheduler.h:213
GHashTable * pcmk__unpack_action_meta(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *action_name, guint interval_ms, const xmlNode *action_config)
Definition: pe_actions.c:692
pcmk_node_t * dc_node
Node object for DC.
Definition: scheduler.h:178
gboolean online
Whether online.
Definition: nodes.h:72
#define pe__clear_working_set_flags(scheduler, flags_to_clear)
Definition: internal.h:58
Whether resource is not an anonymous clone instance.
Definition: resources.h:118
char * clone_strip(const char *last_rsc_id)
Definition: unpack.c:1865
Act as if failure didn&#39;t happen.
Definition: actions.h:163
GList * stop_needed
Containers that need stop actions.
Definition: scheduler.h:222
pcmk_resource_t * remote_rsc
Remote connection resource for node, if it is a Pacemaker Remote node.
Definition: nodes.h:111
pcmk_node_t * partial_migration_source
The source node, if migrate_to completed but migrate_from has not.
Definition: resources.h:457
#define PCMK_ACTION_NOTIFY
Definition: actions.h:61
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:35
char * id
Resource ID in configuration.
Definition: resources.h:400
GHashTable * allowed_nodes
Nodes where resource may run (key is node ID, not name)
Definition: resources.h:466
xmlNode * crm_next_same_xml(const xmlNode *sibling)
Get next instance of same XML tag.
Definition: xml.c:2510