pacemaker  2.0.2-debe490
Scalable High-Availability cluster resource manager
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
unpack.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2019 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU Lesser General Public License
7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <glib.h>
13 
14 #include <crm/crm.h>
15 #include <crm/services.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
18 
19 #include <crm/common/util.h>
20 #include <crm/pengine/rules.h>
21 #include <crm/pengine/internal.h>
22 #include <unpack.h>
23 #include <pe_status_private.h>
24 
25 CRM_TRACE_INIT_DATA(pe_status);
26 
27 #define set_config_flag(data_set, option, flag) do { \
28  const char *tmp = pe_pref(data_set->config_hash, option); \
29  if(tmp) { \
30  if(crm_is_true(tmp)) { \
31  set_bit(data_set->flags, flag); \
32  } else { \
33  clear_bit(data_set->flags, flag); \
34  } \
35  } \
36  } while(0)
37 
38 gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
39  enum action_fail_response *failed, pe_working_set_t * data_set);
40 static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node);
41 
42 // Bitmask for warnings we only want to print once
43 uint32_t pe_wo = 0;
44 
45 static gboolean
46 is_dangling_guest_node(node_t *node)
47 {
48  /* we are looking for a remote-node that was supposed to be mapped to a
49  * container resource, but all traces of that container have disappeared
50  * from both the config and the status section. */
51  if (pe__is_guest_or_remote_node(node) &&
52  node->details->remote_rsc &&
53  node->details->remote_rsc->container == NULL &&
55  return TRUE;
56  }
57 
58  return FALSE;
59 }
60 
61 
69 void
70 pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)
71 {
72  CRM_CHECK(node, return);
73 
74  /* A guest node is fenced by marking its container as failed */
75  if (pe__is_guest_node(node)) {
76  resource_t *rsc = node->details->remote_rsc->container;
77 
78  if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {
79  if (!is_set(rsc->flags, pe_rsc_managed)) {
80  crm_notice("Not fencing guest node %s "
81  "(otherwise would because %s): "
82  "its guest resource %s is unmanaged",
83  node->details->uname, reason, rsc->id);
84  } else {
85  crm_warn("Guest node %s will be fenced "
86  "(by recovering its guest resource %s): %s",
87  node->details->uname, rsc->id, reason);
88 
89  /* We don't mark the node as unclean because that would prevent the
90  * node from running resources. We want to allow it to run resources
91  * in this transition if the recovery succeeds.
92  */
93  node->details->remote_requires_reset = TRUE;
95  }
96  }
97 
98  } else if (is_dangling_guest_node(node)) {
99  crm_info("Cleaning up dangling connection for guest node %s: "
100  "fencing was already done because %s, "
101  "and guest resource no longer exists",
102  node->details->uname, reason);
104 
105  } else if (pe__is_remote_node(node)) {
106  resource_t *rsc = node->details->remote_rsc;
107 
108  if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) {
109  crm_notice("Not fencing remote node %s "
110  "(otherwise would because %s): connection is unmanaged",
111  node->details->uname, reason);
112  } else if(node->details->remote_requires_reset == FALSE) {
113  node->details->remote_requires_reset = TRUE;
114  crm_warn("Remote node %s %s: %s",
115  node->details->uname,
116  pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
117  reason);
118  }
119  node->details->unclean = TRUE;
120  pe_fence_op(node, NULL, TRUE, reason, data_set);
121 
122  } else if (node->details->unclean) {
123  crm_trace("Cluster node %s %s because %s",
124  node->details->uname,
125  pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean",
126  reason);
127 
128  } else {
129  crm_warn("Cluster node %s %s: %s",
130  node->details->uname,
131  pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
132  reason);
133  node->details->unclean = TRUE;
134  pe_fence_op(node, NULL, TRUE, reason, data_set);
135  }
136 }
137 
138 // @TODO xpaths can't handle templates, rules, or id-refs
139 
140 // nvpair with provides or requires set to unfencing
141 #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
142  "[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \
143  "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
144  "and @" XML_NVPAIR_ATTR_VALUE "='unfencing']"
145 
146 // unfencing in rsc_defaults or any resource
147 #define XPATH_ENABLE_UNFENCING \
148  "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
149  "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
150  "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
151  "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
152 
153 static
154 void set_if_xpath(unsigned long long flag, const char *xpath,
155  pe_working_set_t *data_set)
156 {
157  xmlXPathObjectPtr result = NULL;
158 
159  if (is_not_set(data_set->flags, flag)) {
160  result = xpath_search(data_set->input, xpath);
161  if (result && (numXpathResults(result) > 0)) {
162  set_bit(data_set->flags, flag);
163  }
164  freeXpathObject(result);
165  }
166 }
167 
168 gboolean
169 unpack_config(xmlNode * config, pe_working_set_t * data_set)
170 {
171  const char *value = NULL;
172  GHashTable *config_hash = crm_str_table_new();
173 
174  data_set->config_hash = config_hash;
175 
176  unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash,
177  CIB_OPTIONS_FIRST, FALSE, data_set->now);
178 
179  verify_pe_options(data_set->config_hash);
180 
181  set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
182  if(is_not_set(data_set->flags, pe_flag_startup_probes)) {
183  crm_info("Startup probes: disabled (dangerous)");
184  }
185 
186  value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
187  if (value && crm_is_true(value)) {
188  crm_notice("Watchdog will be used via SBD if fencing is required");
190  }
191 
192  /* Set certain flags via xpath here, so they can be used before the relevant
193  * configuration sections are unpacked.
194  */
195  set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set);
196 
197  value = pe_pref(data_set->config_hash, "stonith-timeout");
198  data_set->stonith_timeout = crm_get_msec(value);
199  crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
200 
201  set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
202  crm_debug("STONITH of failed nodes is %s",
203  is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
204 
205  data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
206  if (!strcmp(data_set->stonith_action, "poweroff")) {
208  "Support for stonith-action of 'poweroff' is deprecated "
209  "and will be removed in a future release (use 'off' instead)");
210  data_set->stonith_action = "off";
211  }
212  crm_trace("STONITH will %s nodes", data_set->stonith_action);
213 
214  set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing);
215  crm_debug("Concurrent fencing is %s",
216  is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled");
217 
218  set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
219  crm_debug("Stop all active resources: %s",
220  is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false");
221 
222  set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
223  if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
224  crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
225  }
226 
227  value = pe_pref(data_set->config_hash, "no-quorum-policy");
228 
229  if (safe_str_eq(value, "ignore")) {
231 
232  } else if (safe_str_eq(value, "freeze")) {
234 
235  } else if (safe_str_eq(value, "suicide")) {
236  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
237  int do_panic = 0;
238 
240  &do_panic);
241  if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) {
243  } else {
244  crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
245  data_set->no_quorum_policy = no_quorum_stop;
246  }
247  } else {
248  crm_config_err("Resetting no-quorum-policy to 'stop': stonith is not configured");
249  data_set->no_quorum_policy = no_quorum_stop;
250  }
251 
252  } else {
253  data_set->no_quorum_policy = no_quorum_stop;
254  }
255 
256  switch (data_set->no_quorum_policy) {
257  case no_quorum_freeze:
258  crm_debug("On loss of quorum: Freeze resources");
259  break;
260  case no_quorum_stop:
261  crm_debug("On loss of quorum: Stop ALL resources");
262  break;
263  case no_quorum_suicide:
264  crm_notice("On loss of quorum: Fence all remaining nodes");
265  break;
266  case no_quorum_ignore:
267  crm_notice("On loss of quorum: Ignore");
268  break;
269  }
270 
271  set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
272  crm_trace("Orphan resources are %s",
273  is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored");
274 
275  set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
276  crm_trace("Orphan resource actions are %s",
277  is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored");
278 
279  set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
280  crm_trace("Stopped resources are removed from the status section: %s",
281  is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false");
282 
283  set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
284  crm_trace("Maintenance mode: %s",
285  is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false");
286 
287  set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
288  crm_trace("Start failures are %s",
289  is_set(data_set->flags,
290  pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount");
291 
292  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
293  set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing);
294  }
295  if (is_set(data_set->flags, pe_flag_startup_fencing)) {
296  crm_trace("Unseen nodes will be fenced");
297  } else {
298  pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes");
299  }
300 
301  node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
302  node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
303  node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
304 
305  crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
306  pe_pref(data_set->config_hash, "node-health-red"),
307  pe_pref(data_set->config_hash, "node-health-yellow"),
308  pe_pref(data_set->config_hash, "node-health-green"));
309 
310  data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
311  crm_trace("Placement strategy: %s", data_set->placement_strategy);
312 
313  return TRUE;
314 }
315 
316 static void
317 destroy_digest_cache(gpointer ptr)
318 {
319  op_digest_cache_t *data = ptr;
320 
321  free_xml(data->params_all);
322  free_xml(data->params_secure);
323  free_xml(data->params_restart);
324 
325  free(data->digest_all_calc);
326  free(data->digest_restart_calc);
327  free(data->digest_secure_calc);
328 
329  free(data);
330 }
331 
332 node_t *
333 pe_create_node(const char *id, const char *uname, const char *type,
334  const char *score, pe_working_set_t * data_set)
335 {
336  node_t *new_node = NULL;
337 
338  if (pe_find_node(data_set->nodes, uname) != NULL) {
339  crm_config_warn("Detected multiple node entries with uname=%s"
340  " - this is rarely intended", uname);
341  }
342 
343  new_node = calloc(1, sizeof(node_t));
344  if (new_node == NULL) {
345  return NULL;
346  }
347 
348  new_node->weight = char2score(score);
349  new_node->fixed = FALSE;
350  new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
351 
352  if (new_node->details == NULL) {
353  free(new_node);
354  return NULL;
355  }
356 
357  crm_trace("Creating node for entry %s/%s", uname, id);
358  new_node->details->id = id;
359  new_node->details->uname = uname;
360  new_node->details->online = FALSE;
361  new_node->details->shutdown = FALSE;
362  new_node->details->rsc_discovery_enabled = TRUE;
363  new_node->details->running_rsc = NULL;
364  new_node->details->type = node_ping;
365 
366  if (safe_str_eq(type, "remote")) {
367  new_node->details->type = node_remote;
369  } else if ((type == NULL) || safe_str_eq(type, "member")) {
370  new_node->details->type = node_member;
371  }
372 
373  new_node->details->attrs = crm_str_table_new();
374 
375  if (pe__is_guest_or_remote_node(new_node)) {
376  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
377  strdup("remote"));
378  } else {
379  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
380  strdup("cluster"));
381  }
382 
383  new_node->details->utilization = crm_str_table_new();
384 
385  new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash,
386  g_str_equal, free,
387  destroy_digest_cache);
388 
389  data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname);
390  return new_node;
391 }
392 
393 bool
394 remote_id_conflict(const char *remote_name, pe_working_set_t *data)
395 {
396  bool match = FALSE;
397 #if 1
398  pe_find_resource(data->resources, remote_name);
399 #else
400  if (data->name_check == NULL) {
401  data->name_check = g_hash_table_new(crm_str_hash, g_str_equal);
402  for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next_element(xml_rsc)) {
403  const char *id = ID(xml_rsc);
404 
405  /* avoiding heap allocation here because we know the duration of this hashtable allows us to */
406  g_hash_table_insert(data->name_check, (char *) id, (char *) id);
407  }
408  }
409  if (g_hash_table_lookup(data->name_check, remote_name)) {
410  match = TRUE;
411  }
412 #endif
413  if (match) {
414  crm_err("Invalid remote-node name, a resource called '%s' already exists.", remote_name);
415  return NULL;
416  }
417 
418  return match;
419 }
420 
421 
422 static const char *
423 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data)
424 {
425  xmlNode *attr_set = NULL;
426  xmlNode *attr = NULL;
427 
428  const char *container_id = ID(xml_obj);
429  const char *remote_name = NULL;
430  const char *remote_server = NULL;
431  const char *remote_port = NULL;
432  const char *connect_timeout = "60s";
433  const char *remote_allow_migrate=NULL;
434  const char *is_managed = NULL;
435 
436  for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) {
437  if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) {
438  continue;
439  }
440 
441  for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next_element(attr)) {
442  const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
443  const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
444 
446  remote_name = value;
447  } else if (safe_str_eq(name, "remote-addr")) {
448  remote_server = value;
449  } else if (safe_str_eq(name, "remote-port")) {
450  remote_port = value;
451  } else if (safe_str_eq(name, "remote-connect-timeout")) {
452  connect_timeout = value;
453  } else if (safe_str_eq(name, "remote-allow-migrate")) {
454  remote_allow_migrate=value;
455  } else if (safe_str_eq(name, XML_RSC_ATTR_MANAGED)) {
456  is_managed = value;
457  }
458  }
459  }
460 
461  if (remote_name == NULL) {
462  return NULL;
463  }
464 
465  if (remote_id_conflict(remote_name, data)) {
466  return NULL;
467  }
468 
469  pe_create_remote_xml(parent, remote_name, container_id,
470  remote_allow_migrate, is_managed,
471  connect_timeout, remote_server, remote_port);
472  return remote_name;
473 }
474 
475 static void
476 handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node)
477 {
478  if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
479  /* Ignore fencing for remote nodes that don't have a connection resource
480  * associated with them. This happens when remote node entries get left
481  * in the nodes section after the connection resource is removed.
482  */
483  return;
484  }
485 
486  if (is_set(data_set->flags, pe_flag_startup_fencing)) {
487  // All nodes are unclean until we've seen their status entry
488  new_node->details->unclean = TRUE;
489 
490  } else {
491  // Blind faith ...
492  new_node->details->unclean = FALSE;
493  }
494 
495  /* We need to be able to determine if a node's status section
496  * exists or not separate from whether the node is unclean. */
497  new_node->details->unseen = TRUE;
498 }
499 
500 gboolean
501 unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
502 {
503  xmlNode *xml_obj = NULL;
504  node_t *new_node = NULL;
505  const char *id = NULL;
506  const char *uname = NULL;
507  const char *type = NULL;
508  const char *score = NULL;
509 
510  for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
511  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
512  new_node = NULL;
513 
514  id = crm_element_value(xml_obj, XML_ATTR_ID);
515  uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
516  type = crm_element_value(xml_obj, XML_ATTR_TYPE);
517  score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
518  crm_trace("Processing node %s/%s", uname, id);
519 
520  if (id == NULL) {
521  crm_config_err("Must specify id tag in <node>");
522  continue;
523  }
524  new_node = pe_create_node(id, uname, type, score, data_set);
525 
526  if (new_node == NULL) {
527  return FALSE;
528  }
529 
530 /* if(data_set->have_quorum == FALSE */
531 /* && data_set->no_quorum_policy == no_quorum_stop) { */
532 /* /\* start shutting resources down *\/ */
533 /* new_node->weight = -INFINITY; */
534 /* } */
535 
536  handle_startup_fencing(data_set, new_node);
537 
538  add_node_attrs(xml_obj, new_node, FALSE, data_set);
539  unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL,
540  new_node->details->utilization, NULL, FALSE, data_set->now);
541 
542  crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
543  }
544  }
545 
546  if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
547  crm_info("Creating a fake local node");
548  pe_create_node(data_set->localhost, data_set->localhost, NULL, 0,
549  data_set);
550  }
551 
552  return TRUE;
553 }
554 
555 static void
556 setup_container(resource_t * rsc, pe_working_set_t * data_set)
557 {
558  const char *container_id = NULL;
559 
560  if (rsc->children) {
561  GListPtr gIter = rsc->children;
562 
563  for (; gIter != NULL; gIter = gIter->next) {
564  resource_t *child_rsc = (resource_t *) gIter->data;
565 
566  setup_container(child_rsc, data_set);
567  }
568  return;
569  }
570 
571  container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
572  if (container_id && safe_str_neq(container_id, rsc->id)) {
573  resource_t *container = pe_find_resource(data_set->resources, container_id);
574 
575  if (container) {
576  rsc->container = container;
577  set_bit(container->flags, pe_rsc_is_container);
578  container->fillers = g_list_append(container->fillers, rsc);
579  pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
580  } else {
581  pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
582  }
583  }
584 }
585 
586 gboolean
587 unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
588 {
589  xmlNode *xml_obj = NULL;
590 
591  /* Create remote nodes and guest nodes from the resource configuration
592  * before unpacking resources.
593  */
594  for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
595  const char *new_node_id = NULL;
596 
597  /* Check for remote nodes, which are defined by ocf:pacemaker:remote
598  * primitives.
599  */
600  if (xml_contains_remote_node(xml_obj)) {
601  new_node_id = ID(xml_obj);
602  /* The "pe_find_node" check is here to make sure we don't iterate over
603  * an expanded node that has already been added to the node list. */
604  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
605  crm_trace("Found remote node %s defined by resource %s",
606  new_node_id, ID(xml_obj));
607  pe_create_node(new_node_id, new_node_id, "remote", NULL,
608  data_set);
609  }
610  continue;
611  }
612 
613  /* Check for guest nodes, which are defined by special meta-attributes
614  * of a primitive of any type (for example, VirtualDomain or Xen).
615  */
616  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) {
617  /* This will add an ocf:pacemaker:remote primitive to the
618  * configuration for the guest node's connection, to be unpacked
619  * later.
620  */
621  new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set);
622  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
623  crm_trace("Found guest node %s in resource %s",
624  new_node_id, ID(xml_obj));
625  pe_create_node(new_node_id, new_node_id, "remote", NULL,
626  data_set);
627  }
628  continue;
629  }
630 
631  /* Check for guest nodes inside a group. Clones are currently not
632  * supported as guest nodes.
633  */
634  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) {
635  xmlNode *xml_obj2 = NULL;
636  for (xml_obj2 = __xml_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = __xml_next_element(xml_obj2)) {
637 
638  new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set);
639 
640  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
641  crm_trace("Found guest node %s in resource %s inside group %s",
642  new_node_id, ID(xml_obj2), ID(xml_obj));
643  pe_create_node(new_node_id, new_node_id, "remote", NULL,
644  data_set);
645  }
646  }
647  }
648  }
649  return TRUE;
650 }
651 
652 /* Call this after all the nodes and resources have been
653  * unpacked, but before the status section is read.
654  *
655  * A remote node's online status is reflected by the state
656  * of the remote node's connection resource. We need to link
657  * the remote node to this connection resource so we can have
658  * easy access to the connection resource during the PE calculations.
659  */
660 static void
661 link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc)
662 {
663  node_t *remote_node = NULL;
664 
665  if (new_rsc->is_remote_node == FALSE) {
666  return;
667  }
668 
669  if (is_set(data_set->flags, pe_flag_quick_location)) {
670  /* remote_nodes and remote_resources are not linked in quick location calculations */
671  return;
672  }
673 
674  print_resource(LOG_TRACE, "Linking remote-node connection resource, ", new_rsc, FALSE);
675 
676  remote_node = pe_find_node(data_set->nodes, new_rsc->id);
677  CRM_CHECK(remote_node != NULL, return;);
678 
679  remote_node->details->remote_rsc = new_rsc;
680 
681  if (new_rsc->container == NULL) {
682  /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
683  * the same as is done for cluster nodes.
684  */
685  handle_startup_fencing(data_set, remote_node);
686 
687  } else {
688  /* pe_create_node() marks the new node as "remote" or "cluster"; now
689  * that we know the node is a guest node, update it correctly.
690  */
691  g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
692  strdup("container"));
693  }
694 }
695 
696 static void
697 destroy_tag(gpointer data)
698 {
699  tag_t *tag = data;
700 
701  if (tag) {
702  free(tag->id);
703  g_list_free_full(tag->refs, free);
704  free(tag);
705  }
706 }
707 
720 gboolean
721 unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
722 {
723  xmlNode *xml_obj = NULL;
724  GListPtr gIter = NULL;
725 
726  data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash,
727  g_str_equal, free,
728  destroy_tag);
729 
730  for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
731  resource_t *new_rsc = NULL;
732 
733  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) {
734  const char *template_id = ID(xml_obj);
735 
736  if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets,
737  template_id, NULL, NULL) == FALSE) {
738  /* Record the template's ID for the knowledge of its existence anyway. */
739  g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL);
740  }
741  continue;
742  }
743 
744  crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj));
745  if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) {
746  data_set->resources = g_list_append(data_set->resources, new_rsc);
747  print_resource(LOG_TRACE, "Added ", new_rsc, FALSE);
748 
749  } else {
750  crm_config_err("Failed unpacking %s %s",
751  crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID));
752  if (new_rsc != NULL && new_rsc->fns != NULL) {
753  new_rsc->fns->free(new_rsc);
754  }
755  }
756  }
757 
758  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
759  resource_t *rsc = (resource_t *) gIter->data;
760 
761  setup_container(rsc, data_set);
762  link_rsc2remotenode(data_set, rsc);
763  }
764 
765  data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
766  if (is_set(data_set->flags, pe_flag_quick_location)) {
767  /* Ignore */
768 
769  } else if (is_set(data_set->flags, pe_flag_stonith_enabled)
770  && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
771 
772  crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
773  crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
774  crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
775  }
776 
777  return TRUE;
778 }
779 
780 gboolean
781 unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
782 {
783  xmlNode *xml_tag = NULL;
784 
785  data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
786  destroy_tag);
787 
788  for (xml_tag = __xml_first_child(xml_tags); xml_tag != NULL; xml_tag = __xml_next_element(xml_tag)) {
789  xmlNode *xml_obj_ref = NULL;
790  const char *tag_id = ID(xml_tag);
791 
792  if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) {
793  continue;
794  }
795 
796  if (tag_id == NULL) {
797  crm_config_err("Failed unpacking %s: %s should be specified",
798  crm_element_name(xml_tag), XML_ATTR_ID);
799  continue;
800  }
801 
802  for (xml_obj_ref = __xml_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next_element(xml_obj_ref)) {
803  const char *obj_ref = ID(xml_obj_ref);
804 
805  if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) {
806  continue;
807  }
808 
809  if (obj_ref == NULL) {
810  crm_config_err("Failed unpacking %s for tag %s: %s should be specified",
811  crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID);
812  continue;
813  }
814 
815  if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
816  return FALSE;
817  }
818  }
819  }
820 
821  return TRUE;
822 }
823 
824 /* The ticket state section:
825  * "/cib/status/tickets/ticket_state" */
826 static gboolean
827 unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
828 {
829  const char *ticket_id = NULL;
830  const char *granted = NULL;
831  const char *last_granted = NULL;
832  const char *standby = NULL;
833  xmlAttrPtr xIter = NULL;
834 
835  ticket_t *ticket = NULL;
836 
837  ticket_id = ID(xml_ticket);
838  if (ticket_id == NULL || strlen(ticket_id) == 0) {
839  return FALSE;
840  }
841 
842  crm_trace("Processing ticket state for %s", ticket_id);
843 
844  ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
845  if (ticket == NULL) {
846  ticket = ticket_new(ticket_id, data_set);
847  if (ticket == NULL) {
848  return FALSE;
849  }
850  }
851 
852  for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
853  const char *prop_name = (const char *)xIter->name;
854  const char *prop_value = crm_element_value(xml_ticket, prop_name);
855 
856  if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) {
857  continue;
858  }
859  g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
860  }
861 
862  granted = g_hash_table_lookup(ticket->state, "granted");
863  if (granted && crm_is_true(granted)) {
864  ticket->granted = TRUE;
865  crm_info("We have ticket '%s'", ticket->id);
866  } else {
867  ticket->granted = FALSE;
868  crm_info("We do not have ticket '%s'", ticket->id);
869  }
870 
871  last_granted = g_hash_table_lookup(ticket->state, "last-granted");
872  if (last_granted) {
873  ticket->last_granted = crm_parse_int(last_granted, 0);
874  }
875 
876  standby = g_hash_table_lookup(ticket->state, "standby");
877  if (standby && crm_is_true(standby)) {
878  ticket->standby = TRUE;
879  if (ticket->granted) {
880  crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
881  }
882  } else {
883  ticket->standby = FALSE;
884  }
885 
886  crm_trace("Done with ticket state for %s", ticket_id);
887 
888  return TRUE;
889 }
890 
891 static gboolean
892 unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
893 {
894  xmlNode *xml_obj = NULL;
895 
896  for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
897  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) {
898  continue;
899  }
900  unpack_ticket_state(xml_obj, data_set);
901  }
902 
903  return TRUE;
904 }
905 
906 static void
907 unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t * data_set)
908 {
909  const char *resource_discovery_enabled = NULL;
910  xmlNode *attrs = NULL;
911  resource_t *rsc = NULL;
912  const char *shutdown = NULL;
913 
914  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
915  return;
916  }
917 
918  if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) {
919  return;
920  }
921  crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname);
922 
923  this_node->details->remote_maintenance =
925 
926  rsc = this_node->details->remote_rsc;
927  if (this_node->details->remote_requires_reset == FALSE) {
928  this_node->details->unclean = FALSE;
929  this_node->details->unseen = FALSE;
930  }
931  attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
932  add_node_attrs(attrs, this_node, TRUE, data_set);
933 
934  shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN);
935  if (shutdown != NULL && safe_str_neq("0", shutdown)) {
936  crm_info("Node %s is shutting down", this_node->details->uname);
937  this_node->details->shutdown = TRUE;
938  if (rsc) {
940  }
941  }
942 
943  if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
944  crm_info("Node %s is in standby-mode", this_node->details->uname);
945  this_node->details->standby = TRUE;
946  }
947 
948  if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
949  (rsc && !is_set(rsc->flags, pe_rsc_managed))) {
950  crm_info("Node %s is in maintenance-mode", this_node->details->uname);
951  this_node->details->maintenance = TRUE;
952  }
953 
954  resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
955  if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
956  if (pe__is_remote_node(this_node)
957  && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
958  crm_warn("Ignoring %s attribute on remote node %s because stonith is disabled",
960  } else {
961  /* This is either a remote node with fencing enabled, or a guest
962  * node. We don't care whether fencing is enabled when fencing guest
963  * nodes, because they are "fenced" by recovering their containing
964  * resource.
965  */
966  crm_info("Node %s has resource discovery disabled", this_node->details->uname);
967  this_node->details->rsc_discovery_enabled = FALSE;
968  }
969  }
970 }
971 
972 static bool
973 unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
974 {
975  bool changed = false;
976  xmlNode *lrm_rsc = NULL;
977 
978  for (xmlNode *state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
979  const char *id = NULL;
980  const char *uname = NULL;
981  node_t *this_node = NULL;
982  bool process = FALSE;
983 
984  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
985  continue;
986  }
987 
988  id = crm_element_value(state, XML_ATTR_ID);
989  uname = crm_element_value(state, XML_ATTR_UNAME);
990  this_node = pe_find_node_any(data_set->nodes, id, uname);
991 
992  if (this_node == NULL) {
993  crm_info("Node %s is unknown", id);
994  continue;
995 
996  } else if (this_node->details->unpacked) {
997  crm_info("Node %s is already processed", id);
998  continue;
999 
1000  } else if (!pe__is_guest_or_remote_node(this_node)
1001  && is_set(data_set->flags, pe_flag_stonith_enabled)) {
1002  // A redundant test, but preserves the order for regression tests
1003  process = TRUE;
1004 
1005  } else if (pe__is_guest_or_remote_node(this_node)) {
1006  bool check = FALSE;
1007  resource_t *rsc = this_node->details->remote_rsc;
1008 
1009  if(fence) {
1010  check = TRUE;
1011 
1012  } else if(rsc == NULL) {
1013  /* Not ready yet */
1014 
1015  } else if (pe__is_guest_node(this_node)
1016  && rsc->role == RSC_ROLE_STARTED
1017  && rsc->container->role == RSC_ROLE_STARTED) {
1018  /* Both the connection and its containing resource need to be
1019  * known to be up before we process resources running in it.
1020  */
1021  check = TRUE;
1022  crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
1023 
1024  } else if (!pe__is_guest_node(this_node)
1025  && rsc->role == RSC_ROLE_STARTED) {
1026  check = TRUE;
1027  crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
1028  }
1029 
1030  if (check) {
1031  determine_remote_online_status(data_set, this_node);
1032  unpack_handle_remote_attrs(this_node, state, data_set);
1033  process = TRUE;
1034  }
1035 
1036  } else if (this_node->details->online) {
1037  process = TRUE;
1038 
1039  } else if (fence) {
1040  process = TRUE;
1041  }
1042 
1043  if(process) {
1044  crm_trace("Processing lrm resource entries on %shealthy%s node: %s",
1045  fence?"un":"",
1046  (pe__is_guest_or_remote_node(this_node)? " remote" : ""),
1047  this_node->details->uname);
1048  changed = TRUE;
1049  this_node->details->unpacked = TRUE;
1050 
1051  lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
1052  lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
1053  unpack_lrm_resources(this_node, lrm_rsc, data_set);
1054  }
1055  }
1056  return changed;
1057 }
1058 
1059 /* remove nodes that are down, stopping */
1060 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1061 /* anything else? */
1062 gboolean
1063 unpack_status(xmlNode * status, pe_working_set_t * data_set)
1064 {
1065  const char *id = NULL;
1066  const char *uname = NULL;
1067 
1068  xmlNode *state = NULL;
1069  node_t *this_node = NULL;
1070 
1071  crm_trace("Beginning unpack");
1072 
1073  if (data_set->tickets == NULL) {
1074  data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal,
1075  free, destroy_ticket);
1076  }
1077 
1078  for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
1079  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) {
1080  unpack_tickets_state((xmlNode *) state, data_set);
1081 
1082  } else if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
1083  xmlNode *attrs = NULL;
1084  const char *resource_discovery_enabled = NULL;
1085 
1086  id = crm_element_value(state, XML_ATTR_ID);
1087  uname = crm_element_value(state, XML_ATTR_UNAME);
1088  this_node = pe_find_node_any(data_set->nodes, id, uname);
1089 
1090  if (uname == NULL) {
1091  /* error */
1092  continue;
1093 
1094  } else if (this_node == NULL) {
1095  crm_config_warn("Node %s in status section no longer exists", uname);
1096  continue;
1097 
1098  } else if (pe__is_guest_or_remote_node(this_node)) {
1099  /* online state for remote nodes is determined by the
1100  * rsc state after all the unpacking is done. we do however
1101  * need to mark whether or not the node has been fenced as this plays
1102  * a role during unpacking cluster node resource state */
1103  this_node->details->remote_was_fenced =
1105  continue;
1106  }
1107 
1108  crm_trace("Processing node id=%s, uname=%s", id, uname);
1109 
1110  /* Mark the node as provisionally clean
1111  * - at least we have seen it in the current cluster's lifetime
1112  */
1113  this_node->details->unclean = FALSE;
1114  this_node->details->unseen = FALSE;
1115  attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
1116  add_node_attrs(attrs, this_node, TRUE, data_set);
1117 
1118  if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
1119  crm_info("Node %s is in standby-mode", this_node->details->uname);
1120  this_node->details->standby = TRUE;
1121  }
1122 
1123  if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) {
1124  crm_info("Node %s is in maintenance-mode", this_node->details->uname);
1125  this_node->details->maintenance = TRUE;
1126  }
1127 
1128  resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
1129  if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
1130  crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
1132  }
1133 
1134  crm_trace("determining node state");
1135  determine_online_status(state, this_node, data_set);
1136 
1137  if (is_not_set(data_set->flags, pe_flag_have_quorum)
1138  && this_node->details->online
1139  && (data_set->no_quorum_policy == no_quorum_suicide)) {
1140  /* Everything else should flow from this automatically
1141  * At least until the PE becomes able to migrate off healthy resources
1142  */
1143  pe_fence_node(data_set, this_node, "cluster does not have quorum");
1144  }
1145  }
1146  }
1147 
1148 
1149  while(unpack_node_loop(status, FALSE, data_set)) {
1150  crm_trace("Start another loop");
1151  }
1152 
1153  // Now catch any nodes we didn't see
1154  unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set);
1155 
1156  /* Now that we know where resources are, we can schedule stops of containers
1157  * with failed bundle connections
1158  */
1159  if (data_set->stop_needed != NULL) {
1160  for (GList *item = data_set->stop_needed; item; item = item->next) {
1161  pe_resource_t *container = item->data;
1162  pe_node_t *node = pe__current_node(container);
1163 
1164  if (node) {
1165  stop_action(container, node, FALSE);
1166  }
1167  }
1168  g_list_free(data_set->stop_needed);
1169  data_set->stop_needed = NULL;
1170  }
1171 
1172  for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1173  node_t *this_node = gIter->data;
1174 
1175  if (this_node == NULL) {
1176  continue;
1177  } else if (!pe__is_guest_or_remote_node(this_node)) {
1178  continue;
1179  } else if(this_node->details->unpacked) {
1180  continue;
1181  }
1182  determine_remote_online_status(data_set, this_node);
1183  }
1184 
1185  return TRUE;
1186 }
1187 
1188 static gboolean
1189 determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
1190  node_t * this_node)
1191 {
1192  gboolean online = FALSE;
1193  const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
1194  const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
1195  const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
1196  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1197 
1198  if (!crm_is_true(in_cluster)) {
1199  crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster));
1200 
1201  } else if (safe_str_eq(is_peer, ONLINESTATUS)) {
1202  if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
1203  online = TRUE;
1204  } else {
1205  crm_debug("Node is not ready to run resources: %s", join);
1206  }
1207 
1208  } else if (this_node->details->expected_up == FALSE) {
1209  crm_trace("Controller is down: in_cluster=%s", crm_str(in_cluster));
1210  crm_trace("\tis_peer=%s, join=%s, expected=%s",
1211  crm_str(is_peer), crm_str(join), crm_str(exp_state));
1212 
1213  } else {
1214  /* mark it unclean */
1215  pe_fence_node(data_set, this_node, "peer is unexpectedly down");
1216  crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
1217  crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state));
1218  }
1219  return online;
1220 }
1221 
1222 static gboolean
1223 determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
1224  node_t * this_node)
1225 {
1226  gboolean online = FALSE;
1227  gboolean do_terminate = FALSE;
1228  bool crmd_online = FALSE;
1229  const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
1230  const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
1231  const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
1232  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1233  const char *terminate = pe_node_attribute_raw(this_node, "terminate");
1234 
1235 /*
1236  - XML_NODE_IN_CLUSTER ::= true|false
1237  - XML_NODE_IS_PEER ::= online|offline
1238  - XML_NODE_JOIN_STATE ::= member|down|pending|banned
1239  - XML_NODE_EXPECTED ::= member|down
1240 */
1241 
1242  if (crm_is_true(terminate)) {
1243  do_terminate = TRUE;
1244 
1245  } else if (terminate != NULL && strlen(terminate) > 0) {
1246  /* could be a time() value */
1247  char t = terminate[0];
1248 
1249  if (t != '0' && isdigit(t)) {
1250  do_terminate = TRUE;
1251  }
1252  }
1253 
1254  crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
1255  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
1256  crm_str(join), crm_str(exp_state), do_terminate);
1257 
1258  online = crm_is_true(in_cluster);
1259  crmd_online = safe_str_eq(is_peer, ONLINESTATUS);
1260  if (exp_state == NULL) {
1261  exp_state = CRMD_JOINSTATE_DOWN;
1262  }
1263 
1264  if (this_node->details->shutdown) {
1265  crm_debug("%s is shutting down", this_node->details->uname);
1266 
1267  /* Slightly different criteria since we can't shut down a dead peer */
1268  online = crmd_online;
1269 
1270  } else if (in_cluster == NULL) {
1271  pe_fence_node(data_set, this_node, "peer has not been seen by the cluster");
1272 
1273  } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) {
1274  pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria");
1275 
1276  } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) {
1277 
1278  if (crm_is_true(in_cluster) || crmd_online) {
1279  crm_info("- Node %s is not ready to run resources", this_node->details->uname);
1280  this_node->details->standby = TRUE;
1281  this_node->details->pending = TRUE;
1282 
1283  } else {
1284  crm_trace("%s is down or still coming up", this_node->details->uname);
1285  }
1286 
1287  } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN)
1288  && crm_is_true(in_cluster) == FALSE && !crmd_online) {
1289  crm_info("Node %s was just shot", this_node->details->uname);
1290  online = FALSE;
1291 
1292  } else if (crm_is_true(in_cluster) == FALSE) {
1293  pe_fence_node(data_set, this_node, "peer is no longer part of the cluster");
1294 
1295  } else if (!crmd_online) {
1296  pe_fence_node(data_set, this_node, "peer process is no longer available");
1297 
1298  /* Everything is running at this point, now check join state */
1299  } else if (do_terminate) {
1300  pe_fence_node(data_set, this_node, "termination was requested");
1301 
1302  } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
1303  crm_info("Node %s is active", this_node->details->uname);
1304 
1305  } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING)
1306  || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) {
1307  crm_info("Node %s is not ready to run resources", this_node->details->uname);
1308  this_node->details->standby = TRUE;
1309  this_node->details->pending = TRUE;
1310 
1311  } else {
1312  pe_fence_node(data_set, this_node, "peer was in an unknown state");
1313  crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
1314  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
1315  crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown);
1316  }
1317 
1318  return online;
1319 }
1320 
1321 static gboolean
1322 determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node)
1323 {
1324  resource_t *rsc = this_node->details->remote_rsc;
1325  resource_t *container = NULL;
1326  pe_node_t *host = NULL;
1327 
1328  /* If there is a node state entry for a (former) Pacemaker Remote node
1329  * but no resource creating that node, the node's connection resource will
1330  * be NULL. Consider it an offline remote node in that case.
1331  */
1332  if (rsc == NULL) {
1333  this_node->details->online = FALSE;
1334  goto remote_online_done;
1335  }
1336 
1337  container = rsc->container;
1338 
1339  if (container && (g_list_length(rsc->running_on) == 1)) {
1340  host = rsc->running_on->data;
1341  }
1342 
1343  /* If the resource is currently started, mark it online. */
1344  if (rsc->role == RSC_ROLE_STARTED) {
1345  crm_trace("%s node %s presumed ONLINE because connection resource is started",
1346  (container? "Guest" : "Remote"), this_node->details->id);
1347  this_node->details->online = TRUE;
1348  }
1349 
1350  /* consider this node shutting down if transitioning start->stop */
1351  if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
1352  crm_trace("%s node %s shutting down because connection resource is stopping",
1353  (container? "Guest" : "Remote"), this_node->details->id);
1354  this_node->details->shutdown = TRUE;
1355  }
1356 
1357  /* Now check all the failure conditions. */
1358  if(container && is_set(container->flags, pe_rsc_failed)) {
1359  crm_trace("Guest node %s UNCLEAN because guest resource failed",
1360  this_node->details->id);
1361  this_node->details->online = FALSE;
1362  this_node->details->remote_requires_reset = TRUE;
1363 
1364  } else if(is_set(rsc->flags, pe_rsc_failed)) {
1365  crm_trace("%s node %s OFFLINE because connection resource failed",
1366  (container? "Guest" : "Remote"), this_node->details->id);
1367  this_node->details->online = FALSE;
1368 
1369  } else if (rsc->role == RSC_ROLE_STOPPED
1370  || (container && container->role == RSC_ROLE_STOPPED)) {
1371 
1372  crm_trace("%s node %s OFFLINE because its resource is stopped",
1373  (container? "Guest" : "Remote"), this_node->details->id);
1374  this_node->details->online = FALSE;
1375  this_node->details->remote_requires_reset = FALSE;
1376 
1377  } else if (host && (host->details->online == FALSE)
1378  && host->details->unclean) {
1379  crm_trace("Guest node %s UNCLEAN because host is unclean",
1380  this_node->details->id);
1381  this_node->details->online = FALSE;
1382  this_node->details->remote_requires_reset = TRUE;
1383  }
1384 
1385 remote_online_done:
1386  crm_trace("Remote node %s online=%s",
1387  this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1388  return this_node->details->online;
1389 }
1390 
1391 gboolean
1392 determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set)
1393 {
1394  gboolean online = FALSE;
1395  const char *shutdown = NULL;
1396  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1397 
1398  if (this_node == NULL) {
1399  crm_config_err("No node to check");
1400  return online;
1401  }
1402 
1403  this_node->details->shutdown = FALSE;
1404  this_node->details->expected_up = FALSE;
1405  shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN);
1406 
1407  if (shutdown != NULL && safe_str_neq("0", shutdown)) {
1408  this_node->details->shutdown = TRUE;
1409 
1410  } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
1411  this_node->details->expected_up = TRUE;
1412  }
1413 
1414  if (this_node->details->type == node_ping) {
1415  this_node->details->unclean = FALSE;
1416  online = FALSE; /* As far as resource management is concerned,
1417  * the node is safely offline.
1418  * Anyone caught abusing this logic will be shot
1419  */
1420 
1421  } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
1422  online = determine_online_status_no_fencing(data_set, node_state, this_node);
1423 
1424  } else {
1425  online = determine_online_status_fencing(data_set, node_state, this_node);
1426  }
1427 
1428  if (online) {
1429  this_node->details->online = TRUE;
1430 
1431  } else {
1432  /* remove node from contention */
1433  this_node->fixed = TRUE;
1434  this_node->weight = -INFINITY;
1435  }
1436 
1437  if (online && this_node->details->shutdown) {
1438  /* don't run resources here */
1439  this_node->fixed = TRUE;
1440  this_node->weight = -INFINITY;
1441  }
1442 
1443  if (this_node->details->type == node_ping) {
1444  crm_info("Node %s is not a pacemaker node", this_node->details->uname);
1445 
1446  } else if (this_node->details->unclean) {
1447  pe_proc_warn("Node %s is unclean", this_node->details->uname);
1448 
1449  } else if (this_node->details->online) {
1450  crm_info("Node %s is %s", this_node->details->uname,
1451  this_node->details->shutdown ? "shutting down" :
1452  this_node->details->pending ? "pending" :
1453  this_node->details->standby ? "standby" :
1454  this_node->details->maintenance ? "maintenance" : "online");
1455 
1456  } else {
1457  crm_trace("Node %s is offline", this_node->details->uname);
1458  }
1459 
1460  return online;
1461 }
1462 
1471 const char *
1472 pe_base_name_end(const char *id)
1473 {
1474  if (!crm_strlen_zero(id)) {
1475  const char *end = id + strlen(id) - 1;
1476 
1477  for (const char *s = end; s > id; --s) {
1478  switch (*s) {
1479  case '0':
1480  case '1':
1481  case '2':
1482  case '3':
1483  case '4':
1484  case '5':
1485  case '6':
1486  case '7':
1487  case '8':
1488  case '9':
1489  break;
1490  case ':':
1491  return (s == end)? s : (s - 1);
1492  default:
1493  return end;
1494  }
1495  }
1496  return end;
1497  }
1498  return NULL;
1499 }
1500 
1511 char *
1512 clone_strip(const char *last_rsc_id)
1513 {
1514  const char *end = pe_base_name_end(last_rsc_id);
1515  char *basename = NULL;
1516 
1517  CRM_ASSERT(end);
1518  basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1519  CRM_ASSERT(basename);
1520  return basename;
1521 }
1522 
1533 char *
1534 clone_zero(const char *last_rsc_id)
1535 {
1536  const char *end = pe_base_name_end(last_rsc_id);
1537  size_t base_name_len = end - last_rsc_id + 1;
1538  char *zero = NULL;
1539 
1540  CRM_ASSERT(end);
1541  zero = calloc(base_name_len + 3, sizeof(char));
1542  CRM_ASSERT(zero);
1543  memcpy(zero, last_rsc_id, base_name_len);
1544  zero[base_name_len] = ':';
1545  zero[base_name_len + 1] = '0';
1546  return zero;
1547 }
1548 
1549 static resource_t *
1550 create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
1551 {
1552  resource_t *rsc = NULL;
1553  xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
1554 
1555  copy_in_properties(xml_rsc, rsc_entry);
1556  crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
1557  crm_log_xml_debug(xml_rsc, "Orphan resource");
1558 
1559  if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
1560  return NULL;
1561  }
1562 
1563  if (xml_contains_remote_node(xml_rsc)) {
1564  node_t *node;
1565 
1566  crm_debug("Detected orphaned remote node %s", rsc_id);
1567  node = pe_find_node(data_set->nodes, rsc_id);
1568  if (node == NULL) {
1569  node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set);
1570  }
1571  link_rsc2remotenode(data_set, rsc);
1572 
1573  if (node) {
1574  crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
1575  node->details->shutdown = TRUE;
1576  }
1577  }
1578 
1579  if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
1580  /* This orphaned rsc needs to be mapped to a container. */
1581  crm_trace("Detected orphaned container filler %s", rsc_id);
1583  }
1584  set_bit(rsc->flags, pe_rsc_orphan);
1585  data_set->resources = g_list_append(data_set->resources, rsc);
1586  return rsc;
1587 }
1588 
1593 static pe_resource_t *
1594 create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id,
1595  pe_node_t *node, pe_working_set_t *data_set)
1596 {
1597  pe_resource_t *top = pe__create_clone_child(parent, data_set);
1598 
1599  // find_rsc() because we might be a cloned group
1600  pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
1601 
1602  pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
1603  top->id, parent->id, rsc_id, node->details->uname);
1604  return orphan;
1605 }
1606 
1621 static resource_t *
1622 find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent,
1623  const char *rsc_id)
1624 {
1625  GListPtr rIter = NULL;
1626  pe_resource_t *rsc = NULL;
1627  pe_resource_t *inactive_instance = NULL;
1628  gboolean skip_inactive = FALSE;
1629 
1630  CRM_ASSERT(parent != NULL);
1631  CRM_ASSERT(pe_rsc_is_clone(parent));
1632  CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique));
1633 
1634  // Check for active (or partially active, for cloned groups) instance
1635  pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id);
1636  for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
1637  GListPtr locations = NULL;
1638  resource_t *child = rIter->data;
1639 
1640  /* Check whether this instance is already known to be active or pending
1641  * anywhere, at this stage of unpacking. Because this function is called
1642  * for a resource before the resource's individual operation history
1643  * entries are unpacked, locations will generally not contain the
1644  * desired node.
1645  *
1646  * However, there are three exceptions:
1647  * (1) when child is a cloned group and we have already unpacked the
1648  * history of another member of the group on the same node;
1649  * (2) when we've already unpacked the history of another numbered
1650  * instance on the same node (which can happen if globally-unique
1651  * was flipped from true to false); and
1652  * (3) when we re-run calculations on the same data set as part of a
1653  * simulation.
1654  */
1655  child->fns->location(child, &locations, 2);
1656  if (locations) {
1657  /* We should never associate the same numbered anonymous clone
1658  * instance with multiple nodes, and clone instances can't migrate,
1659  * so there must be only one location, regardless of history.
1660  */
1661  CRM_LOG_ASSERT(locations->next == NULL);
1662 
1663  if (((pe_node_t *)locations->data)->details == node->details) {
1664  /* This child instance is active on the requested node, so check
1665  * for a corresponding configured resource. We use find_rsc()
1666  * instead of child because child may be a cloned group, and we
1667  * need the particular member corresponding to rsc_id.
1668  *
1669  * If the history entry is orphaned, rsc will be NULL.
1670  */
1671  rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
1672  if (rsc) {
1673  /* If there are multiple instance history entries for an
1674  * anonymous clone in a single node's history (which can
1675  * happen if globally-unique is switched from true to
1676  * false), we want to consider the instances beyond the
1677  * first as orphans, even if there are inactive instance
1678  * numbers available.
1679  */
1680  if (rsc->running_on) {
1681  crm_notice("Active (now-)anonymous clone %s has "
1682  "multiple (orphan) instance histories on %s",
1683  parent->id, node->details->uname);
1684  skip_inactive = TRUE;
1685  rsc = NULL;
1686  } else {
1687  pe_rsc_trace(parent, "Resource %s, active", rsc->id);
1688  }
1689  }
1690  }
1691  g_list_free(locations);
1692 
1693  } else {
1694  pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
1695  if (!skip_inactive && !inactive_instance
1696  && is_not_set(child->flags, pe_rsc_block)) {
1697  // Remember one inactive instance in case we don't find active
1698  inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
1699  pe_find_clone);
1700 
1701  /* ... but don't use it if it was already associated with a
1702  * pending action on another node
1703  */
1704  if (inactive_instance && inactive_instance->pending_node
1705  && (inactive_instance->pending_node->details != node->details)) {
1706  inactive_instance = NULL;
1707  }
1708  }
1709  }
1710  }
1711 
1712  if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
1713  pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
1714  rsc = inactive_instance;
1715  }
1716 
1717  /* If the resource has "requires" set to "quorum" or "nothing", and we don't
1718  * have a clone instance for every node, we don't want to consume a valid
1719  * instance number for unclean nodes. Such instances may appear to be active
1720  * according to the history, but should be considered inactive, so we can
1721  * start an instance elsewhere. Treat such instances as orphans.
1722  *
1723  * An exception is instances running on guest nodes -- since guest node
1724  * "fencing" is actually just a resource stop, requires shouldn't apply.
1725  *
1726  * @TODO Ideally, we'd use an inactive instance number if it is not needed
1727  * for any clean instances. However, we don't know that at this point.
1728  */
1729  if ((rsc != NULL) && is_not_set(rsc->flags, pe_rsc_needs_fencing)
1730  && (!node->details->online || node->details->unclean)
1731  && !pe__is_guest_node(node)
1732  && !pe__is_universal_clone(parent, data_set)) {
1733 
1734  rsc = NULL;
1735  }
1736 
1737  if (rsc == NULL) {
1738  rsc = create_anonymous_orphan(parent, rsc_id, node, data_set);
1739  pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
1740  }
1741  return rsc;
1742 }
1743 
1744 static resource_t *
1745 unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id,
1746  xmlNode * rsc_entry)
1747 {
1748  resource_t *rsc = NULL;
1749  resource_t *parent = NULL;
1750 
1751  crm_trace("looking for %s", rsc_id);
1752  rsc = pe_find_resource(data_set->resources, rsc_id);
1753 
1754  if (rsc == NULL) {
1755  /* If we didn't find the resource by its name in the operation history,
1756  * check it again as a clone instance. Even when clone-max=0, we create
1757  * a single :0 orphan to match against here.
1758  */
1759  char *clone0_id = clone_zero(rsc_id);
1760  resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id);
1761 
1762  if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) {
1763  rsc = clone0;
1764  parent = uber_parent(clone0);
1765  crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
1766  } else {
1767  crm_trace("%s is not known as %s either (orphan)",
1768  rsc_id, clone0_id);
1769  }
1770  free(clone0_id);
1771 
1772  } else if (rsc->variant > pe_native) {
1773  crm_trace("Resource history for %s is orphaned because it is no longer primitive",
1774  rsc_id);
1775  return NULL;
1776 
1777  } else {
1778  parent = uber_parent(rsc);
1779  }
1780 
1781  if (pe_rsc_is_anon_clone(parent)) {
1782 
1783  if (pe_rsc_is_bundled(parent)) {
1784  rsc = pe__find_bundle_replica(parent->parent, node);
1785  } else {
1786  char *base = clone_strip(rsc_id);
1787 
1788  rsc = find_anonymous_clone(data_set, node, parent, base);
1789  free(base);
1790  CRM_ASSERT(rsc != NULL);
1791  }
1792  }
1793 
1794  if (rsc && safe_str_neq(rsc_id, rsc->id)
1795  && safe_str_neq(rsc_id, rsc->clone_name)) {
1796 
1797  free(rsc->clone_name);
1798  rsc->clone_name = strdup(rsc_id);
1799  pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
1800  rsc_id, node->details->uname, rsc->id,
1801  (is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : ""));
1802  }
1803  return rsc;
1804 }
1805 
1806 static resource_t *
1807 process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set)
1808 {
1809  resource_t *rsc = NULL;
1810  const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
1811 
1812  crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
1813  rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
1814 
1815  if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
1817 
1818  } else {
1819  print_resource(LOG_TRACE, "Added orphan", rsc, FALSE);
1820 
1821  CRM_CHECK(rsc != NULL, return NULL);
1822  resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set);
1823  }
1824  return rsc;
1825 }
1826 
1827 static void
1828 process_rsc_state(resource_t * rsc, node_t * node,
1829  enum action_fail_response on_fail,
1830  xmlNode * migrate_op, pe_working_set_t * data_set)
1831 {
1832  node_t *tmpnode = NULL;
1833  char *reason = NULL;
1834 
1835  CRM_ASSERT(rsc);
1836  pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
1837  rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));
1838 
1839  /* process current state */
1840  if (rsc->role != RSC_ROLE_UNKNOWN) {
1841  resource_t *iter = rsc;
1842 
1843  while (iter) {
1844  if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
1845  node_t *n = node_copy(node);
1846 
1847  pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name,
1848  n->details->uname);
1849  g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
1850  }
1851  if (is_set(iter->flags, pe_rsc_unique)) {
1852  break;
1853  }
1854  iter = iter->parent;
1855  }
1856  }
1857 
1858  /* If a managed resource is believed to be running, but node is down ... */
1859  if (rsc->role > RSC_ROLE_STOPPED
1860  && node->details->online == FALSE
1861  && node->details->maintenance == FALSE
1862  && is_set(rsc->flags, pe_rsc_managed)) {
1863 
1864  gboolean should_fence = FALSE;
1865 
1866  /* If this is a guest node, fence it (regardless of whether fencing is
1867  * enabled, because guest node fencing is done by recovery of the
1868  * container resource rather than by the fencer). Mark the resource
1869  * we're processing as failed. When the guest comes back up, its
1870  * operation history in the CIB will be cleared, freeing the affected
1871  * resource to run again once we are sure we know its state.
1872  */
1873  if (pe__is_guest_node(node)) {
1874  set_bit(rsc->flags, pe_rsc_failed);
1875  should_fence = TRUE;
1876 
1877  } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
1878  if (pe__is_remote_node(node) && node->details->remote_rsc
1879  && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
1880 
1881  /* Setting unseen means that fencing of the remote node will
1882  * occur only if the connection resource is not going to start
1883  * somewhere. This allows connection resources on a failed
1884  * cluster node to move to another node without requiring the
1885  * remote nodes to be fenced as well.
1886  */
1887  node->details->unseen = TRUE;
1888  reason = crm_strdup_printf("%s is active there (fencing will be"
1889  " revoked if remote connection can "
1890  "be re-established elsewhere)",
1891  rsc->id);
1892  }
1893  should_fence = TRUE;
1894  }
1895 
1896  if (should_fence) {
1897  if (reason == NULL) {
1898  reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
1899  }
1900  pe_fence_node(data_set, node, reason);
1901  }
1902  free(reason);
1903  }
1904 
1905  if (node->details->unclean) {
1906  /* No extra processing needed
1907  * Also allows resources to be started again after a node is shot
1908  */
1909  on_fail = action_fail_ignore;
1910  }
1911 
1912  switch (on_fail) {
1913  case action_fail_ignore:
1914  /* nothing to do */
1915  break;
1916 
1917  case action_fail_fence:
1918  /* treat it as if it is still running
1919  * but also mark the node as unclean
1920  */
1921  reason = crm_strdup_printf("%s failed there", rsc->id);
1922  pe_fence_node(data_set, node, reason);
1923  free(reason);
1924  break;
1925 
1926  case action_fail_standby:
1927  node->details->standby = TRUE;
1928  node->details->standby_onfail = TRUE;
1929  break;
1930 
1931  case action_fail_block:
1932  /* is_managed == FALSE will prevent any
1933  * actions being sent for the resource
1934  */
1936  set_bit(rsc->flags, pe_rsc_block);
1937  break;
1938 
1939  case action_fail_migrate:
1940  /* make sure it comes up somewhere else
1941  * or not at all
1942  */
1943  resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
1944  break;
1945 
1946  case action_fail_stop:
1947  rsc->next_role = RSC_ROLE_STOPPED;
1948  break;
1949 
1950  case action_fail_recover:
1951  if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
1952  set_bit(rsc->flags, pe_rsc_failed);
1953  stop_action(rsc, node, FALSE);
1954  }
1955  break;
1956 
1958  set_bit(rsc->flags, pe_rsc_failed);
1959 
1960  if (rsc->container && pe_rsc_is_bundled(rsc)) {
1961  /* A bundle's remote connection can run on a different node than
1962  * the bundle's container. We don't necessarily know where the
1963  * container is running yet, so remember it and add a stop
1964  * action for it later.
1965  */
1966  data_set->stop_needed = g_list_prepend(data_set->stop_needed,
1967  rsc->container);
1968  } else if (rsc->container) {
1969  stop_action(rsc->container, node, FALSE);
1970  } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
1971  stop_action(rsc, node, FALSE);
1972  }
1973  break;
1974 
1976  set_bit(rsc->flags, pe_rsc_failed);
1977  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
1978  tmpnode = NULL;
1979  if (rsc->is_remote_node) {
1980  tmpnode = pe_find_node(data_set->nodes, rsc->id);
1981  }
1982  if (tmpnode &&
1983  pe__is_remote_node(tmpnode) &&
1984  tmpnode->details->remote_was_fenced == 0) {
1985 
1986  /* The remote connection resource failed in a way that
1987  * should result in fencing the remote node.
1988  */
1989  pe_fence_node(data_set, tmpnode,
1990  "remote connection is unrecoverable");
1991  }
1992  }
1993 
1994  /* require the stop action regardless if fencing is occurring or not. */
1995  if (rsc->role > RSC_ROLE_STOPPED) {
1996  stop_action(rsc, node, FALSE);
1997  }
1998 
1999  /* if reconnect delay is in use, prevent the connection from exiting the
2000  * "STOPPED" role until the failure is cleared by the delay timeout. */
2001  if (rsc->remote_reconnect_ms) {
2002  rsc->next_role = RSC_ROLE_STOPPED;
2003  }
2004  break;
2005  }
2006 
2007  /* ensure a remote-node connection failure forces an unclean remote-node
2008  * to be fenced. By setting unseen = FALSE, the remote-node failure will
2009  * result in a fencing operation regardless if we're going to attempt to
2010  * reconnect to the remote-node in this transition or not. */
2011  if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
2012  tmpnode = pe_find_node(data_set->nodes, rsc->id);
2013  if (tmpnode && tmpnode->details->unclean) {
2014  tmpnode->details->unseen = FALSE;
2015  }
2016  }
2017 
2018  if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
2019  if (is_set(rsc->flags, pe_rsc_orphan)) {
2020  if (is_set(rsc->flags, pe_rsc_managed)) {
2021  crm_config_warn("Detected active orphan %s running on %s",
2022  rsc->id, node->details->uname);
2023  } else {
2024  crm_config_warn("Cluster configured not to stop active orphans."
2025  " %s must be stopped manually on %s",
2026  rsc->id, node->details->uname);
2027  }
2028  }
2029 
2030  native_add_running(rsc, node, data_set);
2031  if (on_fail != action_fail_ignore) {
2032  set_bit(rsc->flags, pe_rsc_failed);
2033  }
2034 
2035  } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2036  /* Only do this for older status sections that included instance numbers
2037  * Otherwise stopped instances will appear as orphans
2038  */
2039  pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
2040  free(rsc->clone_name);
2041  rsc->clone_name = NULL;
2042 
2043  } else {
2044  GList *possible_matches = pe__resource_actions(rsc, node, RSC_STOP,
2045  FALSE);
2046  GListPtr gIter = possible_matches;
2047 
2048  for (; gIter != NULL; gIter = gIter->next) {
2049  action_t *stop = (action_t *) gIter->data;
2050 
2051  stop->flags |= pe_action_optional;
2052  }
2053 
2054  g_list_free(possible_matches);
2055  }
2056 }
2057 
2058 /* create active recurring operations as optional */
2059 static void
2060 process_recurring(node_t * node, resource_t * rsc,
2061  int start_index, int stop_index,
2062  GListPtr sorted_op_list, pe_working_set_t * data_set)
2063 {
2064  int counter = -1;
2065  const char *task = NULL;
2066  const char *status = NULL;
2067  GListPtr gIter = sorted_op_list;
2068 
2069  CRM_ASSERT(rsc);
2070  pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
2071 
2072  for (; gIter != NULL; gIter = gIter->next) {
2073  xmlNode *rsc_op = (xmlNode *) gIter->data;
2074 
2075  guint interval_ms = 0;
2076  char *key = NULL;
2077  const char *id = ID(rsc_op);
2078  const char *interval_ms_s = NULL;
2079 
2080  counter++;
2081 
2082  if (node->details->online == FALSE) {
2083  pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname);
2084  break;
2085 
2086  /* Need to check if there's a monitor for role="Stopped" */
2087  } else if (start_index < stop_index && counter <= stop_index) {
2088  pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname);
2089  continue;
2090 
2091  } else if (counter < start_index) {
2092  pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter);
2093  continue;
2094  }
2095 
2096  interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS);
2097  interval_ms = crm_parse_ms(interval_ms_s);
2098  if (interval_ms == 0) {
2099  pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname);
2100  continue;
2101  }
2102 
2103  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2104  if (safe_str_eq(status, "-1")) {
2105  pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname);
2106  continue;
2107  }
2108  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2109  /* create the action */
2110  key = generate_op_key(rsc->id, task, interval_ms);
2111  pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname);
2112  custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
2113  }
2114 }
2115 
2116 void
2117 calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
2118 {
2119  int counter = -1;
2120  int implied_monitor_start = -1;
2121  int implied_clone_start = -1;
2122  const char *task = NULL;
2123  const char *status = NULL;
2124  GListPtr gIter = sorted_op_list;
2125 
2126  *stop_index = -1;
2127  *start_index = -1;
2128 
2129  for (; gIter != NULL; gIter = gIter->next) {
2130  xmlNode *rsc_op = (xmlNode *) gIter->data;
2131 
2132  counter++;
2133 
2134  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2135  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2136 
2137  if (safe_str_eq(task, CRMD_ACTION_STOP)
2138  && safe_str_eq(status, "0")) {
2139  *stop_index = counter;
2140 
2141  } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2142  *start_index = counter;
2143 
2144  } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2145  const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
2146 
2147  if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
2148  implied_monitor_start = counter;
2149  }
2150  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
2151  implied_clone_start = counter;
2152  }
2153  }
2154 
2155  if (*start_index == -1) {
2156  if (implied_clone_start != -1) {
2157  *start_index = implied_clone_start;
2158  } else if (implied_monitor_start != -1) {
2159  *start_index = implied_monitor_start;
2160  }
2161  }
2162 }
2163 
2164 static resource_t *
2165 unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
2166 {
2167  GListPtr gIter = NULL;
2168  int stop_index = -1;
2169  int start_index = -1;
2170  enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
2171 
2172  const char *task = NULL;
2173  const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2174 
2175  resource_t *rsc = NULL;
2176  GListPtr op_list = NULL;
2177  GListPtr sorted_op_list = NULL;
2178 
2179  xmlNode *migrate_op = NULL;
2180  xmlNode *rsc_op = NULL;
2181  xmlNode *last_failure = NULL;
2182 
2183  enum action_fail_response on_fail = FALSE;
2184  enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
2185 
2186  crm_trace("[%s] Processing %s on %s",
2187  crm_element_name(rsc_entry), rsc_id, node->details->uname);
2188 
2189  /* extract operations */
2190  op_list = NULL;
2191  sorted_op_list = NULL;
2192 
2193  for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
2194  if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
2195  op_list = g_list_prepend(op_list, rsc_op);
2196  }
2197  }
2198 
2199  if (op_list == NULL) {
2200  /* if there are no operations, there is nothing to do */
2201  return NULL;
2202  }
2203 
2204  /* find the resource */
2205  rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
2206  if (rsc == NULL) {
2207  rsc = process_orphan_resource(rsc_entry, node, data_set);
2208  }
2209  CRM_ASSERT(rsc != NULL);
2210 
2211  /* process operations */
2212  saved_role = rsc->role;
2213  on_fail = action_fail_ignore;
2214  rsc->role = RSC_ROLE_UNKNOWN;
2215  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2216 
2217  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2218  xmlNode *rsc_op = (xmlNode *) gIter->data;
2219 
2220  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2221  if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2222  migrate_op = rsc_op;
2223  }
2224 
2225  unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set);
2226  }
2227 
2228  /* create active recurring operations as optional */
2229  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2230  process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
2231 
2232  /* no need to free the contents */
2233  g_list_free(sorted_op_list);
2234 
2235  process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
2236 
2237  if (get_target_role(rsc, &req_role)) {
2238  if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
2239  pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s"
2240  " with requested next role %s",
2241  rsc->id, role2text(rsc->next_role), role2text(req_role));
2242  rsc->next_role = req_role;
2243 
2244  } else if (req_role > rsc->next_role) {
2245  pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
2246  " with requested next role %s",
2247  rsc->id, role2text(rsc->next_role), role2text(req_role));
2248  }
2249  }
2250 
2251  if (saved_role > rsc->role) {
2252  rsc->role = saved_role;
2253  }
2254 
2255  return rsc;
2256 }
2257 
2258 static void
2259 handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
2260 {
2261  xmlNode *rsc_entry = NULL;
2262  for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
2263  rsc_entry = __xml_next_element(rsc_entry)) {
2264 
2265  resource_t *rsc;
2266  resource_t *container;
2267  const char *rsc_id;
2268  const char *container_id;
2269 
2270  if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) {
2271  continue;
2272  }
2273 
2274  container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
2275  rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2276  if (container_id == NULL || rsc_id == NULL) {
2277  continue;
2278  }
2279 
2280  container = pe_find_resource(data_set->resources, container_id);
2281  if (container == NULL) {
2282  continue;
2283  }
2284 
2285  rsc = pe_find_resource(data_set->resources, rsc_id);
2286  if (rsc == NULL ||
2287  is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE ||
2288  rsc->container != NULL) {
2289  continue;
2290  }
2291 
2292  pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
2293  rsc->id, container_id);
2294  rsc->container = container;
2295  container->fillers = g_list_append(container->fillers, rsc);
2296  }
2297 }
2298 
2299 gboolean
2300 unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
2301 {
2302  xmlNode *rsc_entry = NULL;
2303  gboolean found_orphaned_container_filler = FALSE;
2304 
2305  CRM_CHECK(node != NULL, return FALSE);
2306 
2307  crm_trace("Unpacking resources on %s", node->details->uname);
2308 
2309  for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
2310  rsc_entry = __xml_next_element(rsc_entry)) {
2311 
2312  if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
2313  resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
2314  if (!rsc) {
2315  continue;
2316  }
2317  if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
2318  found_orphaned_container_filler = TRUE;
2319  }
2320  }
2321  }
2322 
2323  /* now that all the resource state has been unpacked for this node
2324  * we have to go back and map any orphaned container fillers to their
2325  * container resource */
2326  if (found_orphaned_container_filler) {
2327  handle_orphaned_container_fillers(lrm_rsc_list, data_set);
2328  }
2329  return TRUE;
2330 }
2331 
2332 static void
2333 set_active(resource_t * rsc)
2334 {
2335  resource_t *top = uber_parent(rsc);
2336 
2337  if (top && is_set(top->flags, pe_rsc_promotable)) {
2338  rsc->role = RSC_ROLE_SLAVE;
2339  } else {
2340  rsc->role = RSC_ROLE_STARTED;
2341  }
2342 }
2343 
2344 static void
2345 set_node_score(gpointer key, gpointer value, gpointer user_data)
2346 {
2347  node_t *node = value;
2348  int *score = user_data;
2349 
2350  node->weight = *score;
2351 }
2352 
2353 #define STATUS_PATH_MAX 1024
2354 static xmlNode *
2355 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2356  pe_working_set_t * data_set)
2357 {
2358  int offset = 0;
2359  char xpath[STATUS_PATH_MAX];
2360 
2361  offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node);
2362  offset +=
2363  snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']",
2364  resource);
2365 
2366  /* Need to check against transition_magic too? */
2367  if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
2368  offset +=
2369  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2370  "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op,
2371  source);
2372  } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
2373  offset +=
2374  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2375  "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op,
2376  source);
2377  } else {
2378  offset +=
2379  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2380  "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op);
2381  }
2382 
2383  CRM_LOG_ASSERT(offset > 0);
2384  return get_xpath_object(xpath, data_set->input, LOG_DEBUG);
2385 }
2386 
2387 static bool
2388 stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
2389  pe_working_set_t *data_set)
2390 {
2391  xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id,
2392  NULL, data_set);
2393 
2394  if (stop_op) {
2395  int stop_id = 0;
2396  int task_id = 0;
2397 
2398  crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
2399  crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
2400  if (stop_id > task_id) {
2401  return TRUE;
2402  }
2403  }
2404  return FALSE;
2405 }
2406 
2407 static void
2408 unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set)
2409 {
2410  /* A successful migration sequence is:
2411  * migrate_to on source node
2412  * migrate_from on target node
2413  * stop on source node
2414  *
2415  * If a migrate_to is followed by a stop, the entire migration (successful
2416  * or failed) is complete, and we don't care what happened on the target.
2417  *
2418  * If no migrate_from has happened, the migration is considered to be
2419  * "partial". If the migrate_from failed, make sure the resource gets
2420  * stopped on both source and target (if up).
2421  *
2422  * If the migrate_to and migrate_from both succeeded (which also implies the
2423  * resource is no longer running on the source), but there is no stop, the
2424  * migration is considered to be "dangling".
2425  */
2426  int from_rc = 0;
2427  int from_status = 0;
2428  const char *migrate_source = NULL;
2429  const char *migrate_target = NULL;
2430  pe_node_t *target = NULL;
2431  pe_node_t *source = NULL;
2432  xmlNode *migrate_from = NULL;
2433 
2434  if (stop_happened_after(rsc, node, xml_op, data_set)) {
2435  return;
2436  }
2437 
2438  // Clones are not allowed to migrate, so role can't be master
2439  rsc->role = RSC_ROLE_STARTED;
2440 
2441  migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2442  migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2443 
2444  target = pe_find_node(data_set->nodes, migrate_target);
2445  source = pe_find_node(data_set->nodes, migrate_source);
2446 
2447  // Check whether there was a migrate_from action
2448  migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target,
2449  migrate_source, data_set);
2450  if (migrate_from) {
2451  crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
2452  crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
2453  pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
2454  ID(migrate_from), migrate_target, from_status, from_rc);
2455  }
2456 
2457  if (migrate_from && from_rc == PCMK_OCF_OK
2458  && from_status == PCMK_LRM_OP_DONE) {
2459  /* The migrate_to and migrate_from both succeeded, so mark the migration
2460  * as "dangling". This will be used to schedule a stop action on the
2461  * source without affecting the target.
2462  */
2463  pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
2464  migrate_source);
2465  rsc->role = RSC_ROLE_STOPPED;
2466  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
2467 
2468  } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed
2469  if (target && target->details->online) {
2470  pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
2471  target->details->online);
2472  native_add_running(rsc, target, data_set);
2473  }
2474 
2475  } else { // Pending, or complete but erased
2476  if (target && target->details->online) {
2477  pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
2478  target->details->online);
2479 
2480  native_add_running(rsc, target, data_set);
2481  if (source && source->details->online) {
2482  /* This is a partial migration: the migrate_to completed
2483  * successfully on the source, but the migrate_from has not
2484  * completed. Remember the source and target; if the newly
2485  * chosen target remains the same when we schedule actions
2486  * later, we may continue with the migration.
2487  */
2488  rsc->partial_migration_target = target;
2489  rsc->partial_migration_source = source;
2490  }
2491  } else {
2492  /* Consider it failed here - forces a restart, prevents migration */
2493  set_bit(rsc->flags, pe_rsc_failed);
2495  }
2496  }
2497 }
2498 
2499 static void
2500 unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set)
2501 {
2502  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2503 
2504  CRM_ASSERT(rsc);
2505  if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2506  int stop_id = 0;
2507  int migrate_id = 0;
2508  const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2509  const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2510 
2511  xmlNode *stop_op =
2512  find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set);
2513  xmlNode *migrate_op =
2514  find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target,
2515  data_set);
2516 
2517  if (stop_op) {
2518  crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
2519  }
2520  if (migrate_op) {
2521  crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
2522  }
2523 
2524  /* Get our state right */
2525  rsc->role = RSC_ROLE_STARTED; /* can be master? */
2526 
2527  if (stop_op == NULL || stop_id < migrate_id) {
2528  node_t *source = pe_find_node(data_set->nodes, migrate_source);
2529 
2530  if (source && source->details->online) {
2531  native_add_running(rsc, source, data_set);
2532  }
2533  }
2534 
2535  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
2536  int stop_id = 0;
2537  int migrate_id = 0;
2538  const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2539  const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2540 
2541  xmlNode *stop_op =
2542  find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set);
2543  xmlNode *migrate_op =
2544  find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source,
2545  data_set);
2546 
2547  if (stop_op) {
2548  crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
2549  }
2550  if (migrate_op) {
2551  crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
2552  }
2553 
2554  /* Get our state right */
2555  rsc->role = RSC_ROLE_STARTED; /* can be master? */
2556 
2557  if (stop_op == NULL || stop_id < migrate_id) {
2558  node_t *target = pe_find_node(data_set->nodes, migrate_target);
2559 
2560  pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op,
2561  migrate_id);
2562  if (target && target->details->online) {
2563  native_add_running(rsc, target, data_set);
2564  }
2565 
2566  } else if (migrate_op == NULL) {
2567  /* Make sure it gets cleaned up, the stop may pre-date the migrate_from */
2568  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
2569  }
2570  }
2571 }
2572 
2573 static void
2574 record_failed_op(xmlNode *op, node_t* node, resource_t *rsc, pe_working_set_t * data_set)
2575 {
2576  xmlNode *xIter = NULL;
2577  const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY);
2578 
2579  if (node->details->online == FALSE) {
2580  return;
2581  }
2582 
2583  for (xIter = data_set->failed->children; xIter; xIter = xIter->next) {
2584  const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY);
2585  const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
2586 
2587  if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) {
2588  crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname);
2589  return;
2590  }
2591  }
2592 
2593  crm_trace("Adding entry %s on %s", op_key, node->details->uname);
2594  crm_xml_add(op, XML_ATTR_UNAME, node->details->uname);
2595  crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id);
2596  add_node_copy(data_set->failed, op);
2597 }
2598 
2599 static const char *get_op_key(xmlNode *xml_op)
2600 {
2601  const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
2602  if(key == NULL) {
2603  key = ID(xml_op);
2604  }
2605  return key;
2606 }
2607 
2608 static void
2609 unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure,
2610  enum action_fail_response * on_fail, pe_working_set_t * data_set)
2611 {
2612  guint interval_ms = 0;
2613  bool is_probe = FALSE;
2614  action_t *action = NULL;
2615 
2616  const char *key = get_op_key(xml_op);
2617  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2618 
2619  CRM_ASSERT(rsc);
2620 
2621  *last_failure = xml_op;
2622 
2623  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
2624  if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2625  is_probe = TRUE;
2626  pe_rsc_trace(rsc, "is a probe: %s", key);
2627  }
2628 
2629  if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
2630  crm_warn("Processing failed %s of %s on %s: %s " CRM_XS " rc=%d",
2631  (is_probe? "probe" : task), rsc->id, node->details->uname,
2632  services_ocf_exitcode_str(rc), rc);
2633 
2634  if (is_probe && (rc != PCMK_OCF_OK)
2635  && (rc != PCMK_OCF_NOT_RUNNING)
2636  && (rc != PCMK_OCF_RUNNING_MASTER)) {
2637 
2638  /* A failed (not just unexpected) probe result could mean the user
2639  * didn't know resources will be probed even where they can't run.
2640  */
2641  crm_notice("If it is not possible for %s to run on %s, see "
2642  "the resource-discovery option for location constraints",
2643  rsc->id, node->details->uname);
2644  }
2645 
2646  record_failed_op(xml_op, node, rsc, data_set);
2647 
2648  } else {
2649  crm_trace("Processing failed op %s for %s on %s: %s (%d)",
2650  task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
2651  rc);
2652  }
2653 
2654  action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
2655  if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) ||
2656  (action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) ||
2657  (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) ||
2658  (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) {
2659  pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
2660  fail2text(action->on_fail), action->uuid, key);
2661  *on_fail = action->on_fail;
2662  }
2663 
2664  if (safe_str_eq(task, CRMD_ACTION_STOP)) {
2665  resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
2666 
2667  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2668  unpack_rsc_migration_failure(rsc, node, xml_op, data_set);
2669 
2670  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
2671  rsc->role = RSC_ROLE_MASTER;
2672 
2673  } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
2674  if (action->on_fail == action_fail_block) {
2675  rsc->role = RSC_ROLE_MASTER;
2676  rsc->next_role = RSC_ROLE_STOPPED;
2677 
2678  } else if(rc == PCMK_OCF_NOT_RUNNING) {
2679  rsc->role = RSC_ROLE_STOPPED;
2680 
2681  } else {
2682  /*
2683  * Staying in master role would put the PE/TE into a loop. Setting
2684  * slave role is not dangerous because the resource will be stopped
2685  * as part of recovery, and any master promotion will be ordered
2686  * after that stop.
2687  */
2688  rsc->role = RSC_ROLE_SLAVE;
2689  }
2690  }
2691 
2692  if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
2693  /* leave stopped */
2694  pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
2695  rsc->role = RSC_ROLE_STOPPED;
2696 
2697  } else if (rsc->role < RSC_ROLE_STARTED) {
2698  pe_rsc_trace(rsc, "Setting %s active", rsc->id);
2699  set_active(rsc);
2700  }
2701 
2702  pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
2703  rsc->id, role2text(rsc->role),
2704  node->details->unclean ? "true" : "false",
2705  fail2text(action->on_fail), role2text(action->fail_role));
2706 
2707  if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
2708  rsc->next_role = action->fail_role;
2709  }
2710 
2711  if (action->fail_role == RSC_ROLE_STOPPED) {
2712  int score = -INFINITY;
2713 
2714  resource_t *fail_rsc = rsc;
2715 
2716  if (fail_rsc->parent) {
2717  resource_t *parent = uber_parent(fail_rsc);
2718 
2719  if (pe_rsc_is_clone(parent)
2720  && is_not_set(parent->flags, pe_rsc_unique)) {
2721  /* For clone resources, if a child fails on an operation
2722  * with on-fail = stop, all the resources fail. Do this by preventing
2723  * the parent from coming up again. */
2724  fail_rsc = parent;
2725  }
2726  }
2727  crm_warn("Making sure %s doesn't come up again", fail_rsc->id);
2728  /* make sure it doesn't come up again */
2729  if (fail_rsc->allowed_nodes != NULL) {
2730  g_hash_table_destroy(fail_rsc->allowed_nodes);
2731  }
2732  fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
2733  g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
2734  }
2735 
2736  pe_free_action(action);
2737 }
2738 
2758 static int
2759 determine_op_status(
2760  resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set)
2761 {
2762  guint interval_ms = 0;
2763  int result = PCMK_LRM_OP_DONE;
2764 
2765  const char *key = get_op_key(xml_op);
2766  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2767 
2768  bool is_probe = FALSE;
2769 
2770  CRM_ASSERT(rsc);
2771 
2772  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
2773  if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2774  is_probe = TRUE;
2775  }
2776 
2777  if (target_rc < 0) {
2778  /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
2779  * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
2780  * target_rc in the transition key, which (along with the similar case
2781  * of a corrupted transition key in the CIB) will be reported to this
2782  * function as -1. Pacemaker 2.0+ does not support rolling upgrades from
2783  * those versions or processing of saved CIB files from those versions,
2784  * so we do not need to care much about this case.
2785  */
2786  result = PCMK_LRM_OP_ERROR;
2787  crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)",
2788  key, node->details->uname);
2789 
2790  } else if (target_rc != rc) {
2791  result = PCMK_LRM_OP_ERROR;
2792  pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
2793  key, node->details->uname,
2794  services_ocf_exitcode_str(rc), rc,
2795  services_ocf_exitcode_str(target_rc), target_rc);
2796  }
2797 
2798  switch (rc) {
2799  case PCMK_OCF_OK:
2800  // @TODO Should this be (rc != target_rc)?
2801  if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) {
2802  result = PCMK_LRM_OP_DONE;
2803  pe_rsc_info(rsc, "Operation %s found resource %s active on %s",
2804  task, rsc->id, node->details->uname);
2805  }
2806  break;
2807 
2808  case PCMK_OCF_NOT_RUNNING:
2809  if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) {
2810  result = PCMK_LRM_OP_DONE;
2811  rsc->role = RSC_ROLE_STOPPED;
2812 
2813  /* clear any previous failure actions */
2814  *on_fail = action_fail_ignore;
2815  rsc->next_role = RSC_ROLE_UNKNOWN;
2816  }
2817  break;
2818 
2820  if (is_probe && (rc != target_rc)) {
2821  result = PCMK_LRM_OP_DONE;
2822  pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s",
2823  task, rsc->id, node->details->uname);
2824  }
2825  rsc->role = RSC_ROLE_MASTER;
2826  break;
2827 
2830  rsc->role = RSC_ROLE_MASTER;
2831  result = PCMK_LRM_OP_ERROR;
2832  break;
2833 
2835  result = PCMK_LRM_OP_ERROR_FATAL;
2836  break;
2837 
2839  if (interval_ms > 0) {
2840  result = PCMK_LRM_OP_NOTSUPPORTED;
2841  break;
2842  }
2843  // fall through
2847  if (!pe_can_fence(data_set, node)
2848  && safe_str_eq(task, CRMD_ACTION_STOP)) {
2849  /* If a stop fails and we can't fence, there's nothing else we can do */
2850  pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)",
2851  rsc->id, task, services_ocf_exitcode_str(rc), rc);
2853  set_bit(rsc->flags, pe_rsc_block);
2854  }
2855  result = PCMK_LRM_OP_ERROR_HARD;
2856  break;
2857 
2858  default:
2859  if (result == PCMK_LRM_OP_DONE) {
2860  crm_info("Treating unknown return code %d for %s on %s as failure",
2861  rc, key, node->details->uname);
2862  result = PCMK_LRM_OP_ERROR;
2863  }
2864  break;
2865  }
2866  return result;
2867 }
2868 
2869 static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set)
2870 {
2871  bool expired = FALSE;
2872  time_t last_failure = 0;
2873  guint interval_ms = 0;
2874  int failure_timeout = rsc->failure_timeout;
2875  const char *key = get_op_key(xml_op);
2876  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2877  const char *clear_reason = NULL;
2878 
2879  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
2880 
2881  /* clearing recurring monitor operation failures automatically
2882  * needs to be carefully considered */
2883  if ((interval_ms != 0) && safe_str_eq(task, "monitor")) {
2884 
2885  /* TODO, in the future we should consider not clearing recurring monitor
2886  * op failures unless the last action for a resource was a "stop" action.
2887  * otherwise it is possible that clearing the monitor failure will result
2888  * in the resource being in an undeterministic state.
2889  *
2890  * For now we handle this potential undeterministic condition for remote
2891  * node connection resources by not clearing a recurring monitor op failure
2892  * until after the node has been fenced. */
2893 
2894  if (is_set(data_set->flags, pe_flag_stonith_enabled)
2895  && rsc->remote_reconnect_ms) {
2896 
2897  node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
2898  if (remote_node && remote_node->details->remote_was_fenced == 0) {
2899  if (strstr(ID(xml_op), "last_failure")) {
2900  crm_info("Waiting to clear monitor failure for remote node %s until fencing has occurred", rsc->id);
2901  }
2902  /* disabling failure timeout for this operation because we believe
2903  * fencing of the remote node should occur first. */
2904  failure_timeout = 0;
2905  }
2906  }
2907  }
2908 
2909  if (failure_timeout > 0) {
2910  int last_run = 0;
2911 
2912  if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) {
2913  time_t now = get_effective_time(data_set);
2914 
2915  if (now > (last_run + failure_timeout)) {
2916  expired = TRUE;
2917  }
2918  }
2919  }
2920 
2921  if (expired) {
2922  if (pe_get_failcount(node, rsc, &last_failure, pe_fc_default, xml_op,
2923  data_set)) {
2924 
2925  // There is a fail count ignoring timeout
2926 
2927  if (pe_get_failcount(node, rsc, &last_failure, pe_fc_effective,
2928  xml_op, data_set) == 0) {
2929  // There is no fail count considering timeout
2930  clear_reason = "it expired";
2931 
2932  } else {
2933  expired = FALSE;
2934  }
2935 
2936  } else if (rsc->remote_reconnect_ms
2937  && strstr(ID(xml_op), "last_failure")) {
2938  // Always clear last failure when reconnect interval is set
2939  clear_reason = "reconnect interval is set";
2940  }
2941 
2942  } else if (strstr(ID(xml_op), "last_failure") &&
2943  ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) {
2944 
2945  if (pe__bundle_needs_remote_name(rsc)) {
2946  /* We haven't allocated resources yet, so we can't reliably
2947  * substitute addr parameters for the REMOTE_CONTAINER_HACK.
2948  * When that's needed, defer the check until later.
2949  */
2950  pe__add_param_check(xml_op, rsc, node, pe_check_last_failure,
2951  data_set);
2952 
2953  } else {
2954  op_digest_cache_t *digest_data = NULL;
2955 
2956  digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
2957  switch (digest_data->rc) {
2958  case RSC_DIGEST_UNKNOWN:
2959  crm_trace("Resource %s history entry %s on %s has no digest to compare",
2960  rsc->id, key, node->details->id);
2961  break;
2962  case RSC_DIGEST_MATCH:
2963  break;
2964  default:
2965  clear_reason = "resource parameters have changed";
2966  break;
2967  }
2968  }
2969  }
2970 
2971  if (clear_reason != NULL) {
2972  // Schedule clearing of the fail count
2973  pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason,
2974  data_set);
2975 
2976  if (is_set(data_set->flags, pe_flag_stonith_enabled)
2977  && rsc->remote_reconnect_ms) {
2978 
2979  pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
2980 
2981  if (remote_node) {
2982  /* If we're clearing a remote connection due to a reconnect
2983  * interval, we want to wait until any scheduled fencing
2984  * completes.
2985  *
2986  * We could limit this to remote_node->details->unclean, but at
2987  * this point, that's always true (it won't be reliable until
2988  * after unpack_node_loop() is done).
2989  */
2990  pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
2991  data_set);
2992 
2993  crm_info("Clearing %s failure will wait until any scheduled "
2994  "fencing of %s completes", task, rsc->id);
2995  order_actions(fence, clear_op, pe_order_implies_then);
2996  }
2997  }
2998  }
2999 
3000  if (expired && (interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3001  switch(rc) {
3002  case PCMK_OCF_OK:
3003  case PCMK_OCF_NOT_RUNNING:
3005  case PCMK_OCF_DEGRADED:
3007  /* Don't expire probes that return these values */
3008  expired = FALSE;
3009  break;
3010  }
3011  }
3012 
3013  return expired;
3014 }
3015 
3016 int get_target_rc(xmlNode *xml_op)
3017 {
3018  int target_rc = 0;
3019  const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
3020 
3021  if (key == NULL) {
3022  return -1;
3023  }
3024  decode_transition_key(key, NULL, NULL, NULL, &target_rc);
3025  return target_rc;
3026 }
3027 
3028 static enum action_fail_response
3029 get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set)
3030 {
3031  int result = action_fail_recover;
3032  action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
3033 
3034  result = action->on_fail;
3035  pe_free_action(action);
3036 
3037  return result;
3038 }
3039 
3040 static void
3041 update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc,
3042  xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set)
3043 {
3044  gboolean clear_past_failure = FALSE;
3045 
3046  CRM_ASSERT(rsc);
3047  CRM_ASSERT(xml_op);
3048 
3049  if (rc == PCMK_OCF_NOT_RUNNING) {
3050  clear_past_failure = TRUE;
3051 
3052  } else if (rc == PCMK_OCF_NOT_INSTALLED) {
3053  rsc->role = RSC_ROLE_STOPPED;
3054 
3055  } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
3056  if (last_failure) {
3057  const char *op_key = get_op_key(xml_op);
3058  const char *last_failure_key = get_op_key(last_failure);
3059 
3060  if (safe_str_eq(op_key, last_failure_key)) {
3061  clear_past_failure = TRUE;
3062  }
3063  }
3064 
3065  if (rsc->role < RSC_ROLE_STARTED) {
3066  set_active(rsc);
3067  }
3068 
3069  } else if (safe_str_eq(task, CRMD_ACTION_START)) {
3070  rsc->role = RSC_ROLE_STARTED;
3071  clear_past_failure = TRUE;
3072 
3073  } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
3074  rsc->role = RSC_ROLE_STOPPED;
3075  clear_past_failure = TRUE;
3076 
3077  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
3078  rsc->role = RSC_ROLE_MASTER;
3079  clear_past_failure = TRUE;
3080 
3081  } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
3082  /* Demote from Master does not clear an error */
3083  rsc->role = RSC_ROLE_SLAVE;
3084 
3085  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
3086  rsc->role = RSC_ROLE_STARTED;
3087  clear_past_failure = TRUE;
3088 
3089  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
3090  unpack_rsc_migration(rsc, node, xml_op, data_set);
3091 
3092  } else if (rsc->role < RSC_ROLE_STARTED) {
3093  pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
3094  set_active(rsc);
3095  }
3096 
3097  /* clear any previous failure actions */
3098  if (clear_past_failure) {
3099  switch (*on_fail) {
3100  case action_fail_stop:
3101  case action_fail_fence:
3102  case action_fail_migrate:
3103  case action_fail_standby:
3104  pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
3105  rsc->id, fail2text(*on_fail));
3106  break;
3107 
3108  case action_fail_block:
3109  case action_fail_ignore:
3110  case action_fail_recover:
3112  *on_fail = action_fail_ignore;
3113  rsc->next_role = RSC_ROLE_UNKNOWN;
3114  break;
3116  if (rsc->remote_reconnect_ms == 0) {
3117  /* With no reconnect interval, the connection is allowed to
3118  * start again after the remote node is fenced and
3119  * completely stopped. (With a reconnect interval, we wait
3120  * for the failure to be cleared entirely before attempting
3121  * to reconnect.)
3122  */
3123  *on_fail = action_fail_ignore;
3124  rsc->next_role = RSC_ROLE_UNKNOWN;
3125  }
3126  break;
3127  }
3128  }
3129 }
3130 
3131 
3132 gboolean
3133 unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
3134  enum action_fail_response * on_fail, pe_working_set_t * data_set)
3135 {
3136  int task_id = 0;
3137 
3138  const char *key = NULL;
3139  const char *task = NULL;
3140  const char *task_key = NULL;
3141 
3142  int rc = 0;
3143  int status = PCMK_LRM_OP_UNKNOWN;
3144  int target_rc = get_target_rc(xml_op);
3145  guint interval_ms = 0;
3146 
3147  gboolean expired = FALSE;
3148  resource_t *parent = rsc;
3149  enum action_fail_response failure_strategy = action_fail_recover;
3150 
3151  CRM_CHECK(rsc != NULL, return FALSE);
3152  CRM_CHECK(node != NULL, return FALSE);
3153  CRM_CHECK(xml_op != NULL, return FALSE);
3154 
3155  task_key = get_op_key(xml_op);
3156 
3157  task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
3159 
3160  crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
3161  crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
3162  crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
3163  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
3164 
3165  CRM_CHECK(task != NULL, return FALSE);
3166  CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE);
3167  CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE);
3168 
3169  if (safe_str_eq(task, CRMD_ACTION_NOTIFY) ||
3171  /* safe to ignore these */
3172  return TRUE;
3173  }
3174 
3175  if (is_not_set(rsc->flags, pe_rsc_unique)) {
3176  parent = uber_parent(rsc);
3177  }
3178 
3179  pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
3180  task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role));
3181 
3182  if (node->details->unclean) {
3183  pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean."
3184  " Further action depends on the value of the stop's on-fail attribute",
3185  node->details->uname, rsc->id);
3186  }
3187 
3188  if(status != PCMK_LRM_OP_NOT_INSTALLED) {
3189  expired = check_operation_expiry(rsc, node, rc, xml_op, data_set);
3190  }
3191 
3192  /* Degraded results are informational only, re-map them to their error-free equivalents */
3193  if (rc == PCMK_OCF_DEGRADED && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3194  rc = PCMK_OCF_OK;
3195 
3196  /* Add them to the failed list to highlight them for the user */
3197  if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
3198  crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED, PCMK_OCF_OK);
3199  record_failed_op(xml_op, node, rsc, data_set);
3200  }
3201 
3202  } else if (rc == PCMK_OCF_DEGRADED_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3204 
3205  /* Add them to the failed list to highlight them for the user */
3206  if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
3208  record_failed_op(xml_op, node, rsc, data_set);
3209  }
3210  }
3211 
3212  if (expired && target_rc != rc) {
3213  const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
3214 
3215  pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
3216  key, node->details->uname,
3217  services_ocf_exitcode_str(rc), rc,
3218  services_ocf_exitcode_str(target_rc), target_rc);
3219 
3220  if (interval_ms == 0) {
3221  crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s",
3222  task_key, rc, magic, node->details->uname);
3223  goto done;
3224 
3225  } else if(node->details->online && node->details->unclean == FALSE) {
3226  crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s",
3227  task_key, rc, magic, node->details->uname);
3228  /* This is SO horrible, but we don't have access to CancelXmlOp() yet */
3229  crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
3230  goto done;
3231  }
3232  }
3233 
3234  /* If the executor reported an operation status of anything but done or
3235  * error, consider that final. But for done or error, we know better whether
3236  * it should be treated as a failure or not, because we know the expected
3237  * result.
3238  */
3239  if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) {
3240  status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
3241  }
3242 
3243  pe_rsc_trace(rsc, "Handling status: %d", status);
3244  switch (status) {
3245  case PCMK_LRM_OP_CANCELLED:
3246  /* do nothing?? */
3247  pe_err("Don't know what to do for cancelled ops yet");
3248  break;
3249 
3250  case PCMK_LRM_OP_PENDING:
3251  if (safe_str_eq(task, CRMD_ACTION_START)) {
3253  set_active(rsc);
3254 
3255  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
3256  rsc->role = RSC_ROLE_MASTER;
3257 
3258  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
3259  /* If a pending migrate_to action is out on a unclean node,
3260  * we have to force the stop action on the target. */
3261  const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
3262  node_t *target = pe_find_node(data_set->nodes, migrate_target);
3263  if (target) {
3264  stop_action(rsc, target, FALSE);
3265  }
3266  }
3267 
3268  if (rsc->pending_task == NULL) {
3269  if (safe_str_eq(task, CRMD_ACTION_STATUS) && (interval_ms == 0)) {
3270  /* Pending probes are not printed, even if pending
3271  * operations are requested. If someone ever requests that
3272  * behavior, uncomment this and the corresponding part of
3273  * native.c:native_pending_task().
3274  */
3275  /*rsc->pending_task = strdup("probe");*/
3276  /*rsc->pending_node = node;*/
3277  } else {
3278  rsc->pending_task = strdup(task);
3279  rsc->pending_node = node;
3280  }
3281  }
3282  break;
3283 
3284  case PCMK_LRM_OP_DONE:
3285  pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname);
3286  update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set);
3287  break;
3288 
3290  failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3291  if (failure_strategy == action_fail_ignore) {
3292  crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: "
3293  "Resource agent doesn't exist",
3294  task_key, status, rc, node->details->uname);
3295  /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
3296  *on_fail = action_fail_migrate;
3297  }
3298  resource_location(parent, node, -INFINITY, "hard-error", data_set);
3299  unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3300  break;
3301 
3302  case PCMK_LRM_OP_ERROR:
3305  case PCMK_LRM_OP_TIMEOUT:
3307 
3308  failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3309  if ((failure_strategy == action_fail_ignore)
3310  || (failure_strategy == action_fail_restart_container
3311  && safe_str_eq(task, CRMD_ACTION_STOP))) {
3312 
3313  crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded",
3314  task_key, rc, node->details->uname);
3315 
3316  update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set);
3317  crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
3319 
3320  record_failed_op(xml_op, node, rsc, data_set);
3321 
3322  if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) {
3323  *on_fail = failure_strategy;
3324  }
3325 
3326  } else {
3327  unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3328 
3329  if(status == PCMK_LRM_OP_ERROR_HARD) {
3330  do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
3331  "Preventing %s from re-starting on %s: operation %s failed '%s' (%d)",
3332  parent->id, node->details->uname,
3333  task, services_ocf_exitcode_str(rc), rc);
3334 
3335  resource_location(parent, node, -INFINITY, "hard-error", data_set);
3336 
3337  } else if(status == PCMK_LRM_OP_ERROR_FATAL) {
3338  crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)",
3339  parent->id, task, services_ocf_exitcode_str(rc), rc);
3340 
3341  resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
3342  }
3343  }
3344  break;
3345  }
3346 
3347  done:
3348  pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", rsc->id, task, role2text(rsc->role), role2text(rsc->next_role));
3349  return TRUE;
3350 }
3351 
3352 gboolean
3353 add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set)
3354 {
3355  const char *cluster_name = NULL;
3356 
3357  g_hash_table_insert(node->details->attrs,
3358  strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
3359 
3360  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
3361  strdup(node->details->id));
3362  if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
3363  data_set->dc_node = node;
3364  node->details->is_dc = TRUE;
3365  g_hash_table_insert(node->details->attrs,
3366  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
3367  } else {
3368  g_hash_table_insert(node->details->attrs,
3369  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
3370  }
3371 
3372  cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
3373  if (cluster_name) {
3374  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
3375  strdup(cluster_name));
3376  }
3377 
3378  unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL,
3379  node->details->attrs, NULL, overwrite, data_set->now);
3380 
3381  if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
3382  const char *site_name = pe_node_attribute_raw(node, "site-name");
3383 
3384  if (site_name) {
3385  g_hash_table_insert(node->details->attrs,
3386  strdup(CRM_ATTR_SITE_NAME),
3387  strdup(site_name));
3388 
3389  } else if (cluster_name) {
3390  /* Default to cluster-name if unset */
3391  g_hash_table_insert(node->details->attrs,
3392  strdup(CRM_ATTR_SITE_NAME),
3393  strdup(cluster_name));
3394  }
3395  }
3396  return TRUE;
3397 }
3398 
3399 static GListPtr
3400 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
3401 {
3402  int counter = -1;
3403  int stop_index = -1;
3404  int start_index = -1;
3405 
3406  xmlNode *rsc_op = NULL;
3407 
3408  GListPtr gIter = NULL;
3409  GListPtr op_list = NULL;
3410  GListPtr sorted_op_list = NULL;
3411 
3412  /* extract operations */
3413  op_list = NULL;
3414  sorted_op_list = NULL;
3415 
3416  for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
3417  if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
3418  crm_xml_add(rsc_op, "resource", rsc);
3419  crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
3420  op_list = g_list_prepend(op_list, rsc_op);
3421  }
3422  }
3423 
3424  if (op_list == NULL) {
3425  /* if there are no operations, there is nothing to do */
3426  return NULL;
3427  }
3428 
3429  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
3430 
3431  /* create active recurring operations as optional */
3432  if (active_filter == FALSE) {
3433  return sorted_op_list;
3434  }
3435 
3436  op_list = NULL;
3437 
3438  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
3439 
3440  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
3441  xmlNode *rsc_op = (xmlNode *) gIter->data;
3442 
3443  counter++;
3444 
3445  if (start_index < stop_index) {
3446  crm_trace("Skipping %s: not active", ID(rsc_entry));
3447  break;
3448 
3449  } else if (counter < start_index) {
3450  crm_trace("Skipping %s: old", ID(rsc_op));
3451  continue;
3452  }
3453  op_list = g_list_append(op_list, rsc_op);
3454  }
3455 
3456  g_list_free(sorted_op_list);
3457  return op_list;
3458 }
3459 
3460 GListPtr
3461 find_operations(const char *rsc, const char *node, gboolean active_filter,
3462  pe_working_set_t * data_set)
3463 {
3464  GListPtr output = NULL;
3465  GListPtr intermediate = NULL;
3466 
3467  xmlNode *tmp = NULL;
3468  xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
3469 
3470  node_t *this_node = NULL;
3471 
3472  xmlNode *node_state = NULL;
3473 
3474  for (node_state = __xml_first_child(status); node_state != NULL;
3475  node_state = __xml_next_element(node_state)) {
3476 
3477  if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
3478  const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
3479 
3480  if (node != NULL && safe_str_neq(uname, node)) {
3481  continue;
3482  }
3483 
3484  this_node = pe_find_node(data_set->nodes, uname);
3485  if(this_node == NULL) {
3486  CRM_LOG_ASSERT(this_node != NULL);
3487  continue;
3488 
3489  } else if (pe__is_guest_or_remote_node(this_node)) {
3490  determine_remote_online_status(data_set, this_node);
3491 
3492  } else {
3493  determine_online_status(node_state, this_node, data_set);
3494  }
3495 
3496  if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
3497  /* offline nodes run no resources...
3498  * unless stonith is enabled in which case we need to
3499  * make sure rsc start events happen after the stonith
3500  */
3501  xmlNode *lrm_rsc = NULL;
3502 
3503  tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
3504  tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
3505 
3506  for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL;
3507  lrm_rsc = __xml_next_element(lrm_rsc)) {
3508  if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
3509 
3510  const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
3511 
3512  if (rsc != NULL && safe_str_neq(rsc_id, rsc)) {
3513  continue;
3514  }
3515 
3516  intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
3517  output = g_list_concat(output, intermediate);
3518  }
3519  }
3520  }
3521  }
3522  }
3523 
3524  return output;
3525 }
GHashTable * tags
Definition: pe_types.h:156
gboolean unpack_rsc_op(resource_t *rsc, node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *failed, pe_working_set_t *data_set)
Definition: unpack.c:3133
bool remote_id_conflict(const char *remote_name, pe_working_set_t *data)
Definition: unpack.c:394
Services API.
#define LOG_TRACE
Definition: logging.h:26
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:156
char uname[MAX_NAME]
Definition: internal.h:87
GListPtr nodes
Definition: pe_types.h:133
#define XML_RSC_OP_LAST_CHANGE
Definition: msg_xml.h:280
void verify_pe_options(GHashTable *options)
Definition: common.c:175
xmlNode * find_xml_node(xmlNode *cib, const char *node_path, gboolean must_find)
Definition: xml.c:1678
enum pe_quorum_policy no_quorum_policy
Definition: pe_types.h:125
#define RSC_STOP
Definition: crm.h:177
A dumping ground.
#define crm_notice(fmt, args...)
Definition: logging.h:242
GHashTable * known_on
Definition: pe_types.h:330
#define CRMD_ACTION_MIGRATED
Definition: crm.h:147
xmlNode * failed
Definition: pe_types.h:141
#define pe_flag_stop_action_orphans
Definition: pe_types.h:97
GHashTable * attrs
Definition: pe_types.h:204
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:17
gboolean unseen
Definition: pe_types.h:188
gboolean fixed
Definition: pe_types.h:211
gboolean safe_str_neq(const char *a, const char *b)
Definition: strings.c:157
#define INFINITY
Definition: crm.h:73
gint sort_rsc_priority(gconstpointer a, gconstpointer b)
Definition: utils.c:414
#define CRM_ATTR_KIND
Definition: crm.h:90
gboolean get_target_role(resource_t *rsc, enum rsc_role_e *role)
Definition: utils.c:1740
GListPtr dangling_migrations
Definition: pe_types.h:341
#define XML_NODE_IS_FENCED
Definition: msg_xml.h:243
#define XML_ATTR_TRANSITION_MAGIC
Definition: msg_xml.h:356
GHashTable * state
Definition: pe_types.h:416
node_t * node_copy(const node_t *this_node)
Definition: utils.c:118
#define CRM_ATTR_IS_DC
Definition: crm.h:92
#define stop_action(rsc, node, optional)
Definition: internal.h:210
void pe__add_param_check(xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, enum pe_check_parameters, pe_working_set_t *data_set)
Definition: remote.c:215
pe_resource_t * container
Definition: pe_types.h:343
pe_node_t * partial_migration_source
Definition: pe_types.h:328
#define XML_ATTR_QUORUM_PANIC
Definition: msg_xml.h:84
#define pe_flag_concurrent_fencing
Definition: pe_types.h:94
node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t *data_set)
Definition: unpack.c:333
#define XML_ATTR_TYPE
Definition: msg_xml.h:99
bool pe_can_fence(pe_working_set_t *data_set, node_t *node)
Definition: utils.c:91
enum rsc_role_e role
Definition: pe_types.h:333
#define XML_TAG_UTILIZATION
Definition: msg_xml.h:171
#define XML_RULE_ATTR_SCORE
Definition: msg_xml.h:296
#define XML_BOOLEAN_FALSE
Definition: msg_xml.h:108
#define crm_config_err(fmt...)
Definition: crm_internal.h:179
#define pe_flag_symmetric_cluster
Definition: pe_types.h:88
gboolean standby
Definition: pe_types.h:415
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port)
Definition: remote.c:151
#define pe_flag_remove_after_stop
Definition: pe_types.h:101
enum rsc_role_e next_role
Definition: pe_types.h:334
action_t * pe_fence_op(node_t *node, const char *op, bool optional, const char *reason, pe_working_set_t *data_set)
Definition: utils.c:2193
#define pe_flag_maintenance_mode
Definition: pe_types.h:89
enum action_fail_response on_fail
Definition: pe_types.h:376
int char2score(const char *score)
Definition: utils.c:199
#define pe_proc_warn(fmt...)
Definition: internal.h:23
pe_resource_t * remote_rsc
Definition: pe_types.h:200
#define XML_TAG_TRANSIENT_NODEATTRS
Definition: msg_xml.h:361
#define CRMD_ACTION_NOTIFY
Definition: crm.h:160
long long crm_get_msec(const char *input)
Definition: utils.c:567
gboolean unpack_resources(xmlNode *xml_resources, pe_working_set_t *data_set)
Definition: unpack.c:721
GHashTable * meta
Definition: pe_types.h:336
#define pe_rsc_unique
Definition: pe_types.h:223
gboolean common_unpack(xmlNode *xml_obj, resource_t **rsc, resource_t *parent, pe_working_set_t *data_set)
Definition: complex.c:360
resource_object_functions_t * fns
Definition: pe_types.h:295
#define XML_CIB_TAG_TAG
Definition: msg_xml.h:388
#define XML_LRM_TAG_RESOURCE
Definition: msg_xml.h:227
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition: nvpair.c:275
#define CRMD_ACTION_PROMOTE
Definition: crm.h:155
gboolean pe__is_guest_or_remote_node(pe_node_t *node)
Definition: remote.c:58
int crm_parse_int(const char *text, const char *default_text)
Parse an integer value from a string.
Definition: strings.c:110
gboolean unpack_tags(xmlNode *xml_tags, pe_working_set_t *data_set)
Definition: unpack.c:781
GListPtr fillers
Definition: pe_types.h:344
gboolean pending
Definition: pe_types.h:186
GListPtr resources
Definition: pe_types.h:134
#define XML_NVPAIR_ATTR_NAME
Definition: msg_xml.h:339
#define XML_NODE_IS_MAINTENANCE
Definition: msg_xml.h:244
char * id
Definition: pe_types.h:420
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
Definition: utils.c:1609
#define XML_NODE_EXPECTED
Definition: msg_xml.h:239
#define XML_CIB_TAG_RSC_TEMPLATE
Definition: msg_xml.h:180
AIS_Host host
Definition: internal.h:86
time_t get_effective_time(pe_working_set_t *data_set)
Definition: utils.c:1725
#define CRM_LOG_ASSERT(expr)
Definition: logging.h:142
pe_node_t * pe_find_node(GListPtr node_list, const char *uname)
Definition: status.c:412
const char * pe_pref(GHashTable *options, const char *name)
Definition: common.c:181
xmlNode * params_restart
Definition: internal.h:312
gboolean determine_online_status(xmlNode *node_state, node_t *this_node, pe_working_set_t *data_set)
Definition: unpack.c:1392
gboolean unpack_remote_nodes(xmlNode *xml_resources, pe_working_set_t *data_set)
Definition: unpack.c:587
#define clear_bit(word, bit)
Definition: crm_internal.h:168
void copy_in_properties(xmlNode *target, xmlNode *src)
Definition: xml.c:1750
#define CRMD_JOINSTATE_NACK
Definition: crm.h:140
#define XML_CIB_TAG_LRM
Definition: msg_xml.h:225
#define CRM_ATTR_CLUSTER_NAME
Definition: crm.h:93
pe_node_t * partial_migration_target
Definition: pe_types.h:327
int get_target_rc(xmlNode *xml_op)
Definition: unpack.c:3016
GHashTable * tickets
Definition: pe_types.h:128
gboolean remote_was_fenced
Definition: pe_types.h:195
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Definition: nvpair.c:459
#define pe_flag_have_quorum
Definition: pe_types.h:87
gboolean unpack_config(xmlNode *config, pe_working_set_t *data_set)
Definition: unpack.c:169
char * pending_task
Definition: pe_types.h:309
bool pe__bundle_needs_remote_name(pe_resource_t *rsc)
Definition: bundle.c:955
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
Definition: xpath.c:220
#define pe_proc_err(fmt...)
Definition: internal.h:22
gboolean remote_requires_reset
Definition: pe_types.h:194
action_fail_response
Definition: common.h:36
char * strndup(const char *str, size_t len)
char * dc_uuid
Definition: pe_types.h:117
#define XML_CIB_TAG_PROPSET
Definition: msg_xml.h:162
gboolean decode_transition_key(const char *key, char **uuid, int *action, int *transition_id, int *target_rc)
Parse a transition key into its constituent parts.
Definition: operations.c:222
#define CRM_TRACE_INIT_DATA(name)
Definition: logging.h:111
#define pe_flag_stop_everything
Definition: pe_types.h:98
#define XML_LRM_ATTR_RSCID
Definition: msg_xml.h:269
gboolean remote_maintenance
Definition: pe_types.h:196
#define CRMD_ACTION_START
Definition: crm.h:149
uint32_t id
Definition: internal.h:82
gboolean is_dc
Definition: pe_types.h:191
#define XML_LRM_ATTR_TASK_KEY
Definition: msg_xml.h:261
#define XML_TAG_ATTR_SETS
Definition: msg_xml.h:163
#define XML_LRM_ATTR_TASK
Definition: msg_xml.h:260
const char * role2text(enum rsc_role_e role)
Definition: common.c:329
GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set)
Definition: unpack.c:3461
#define CRMD_ACTION_STOP
Definition: crm.h:152
pe_resource_t * pe__find_bundle_replica(const pe_resource_t *bundle, const pe_node_t *node)
Definition: bundle.c:1404
#define STATUS_PATH_MAX
Definition: unpack.c:2353
int weight
Definition: pe_types.h:210
#define pe_flag_have_remote_nodes
Definition: pe_types.h:106
#define CRMD_JOINSTATE_DOWN
Definition: crm.h:137
#define crm_warn(fmt, args...)
Definition: logging.h:241
guint remote_reconnect_ms
Definition: pe_types.h:308
#define CRMD_ACTION_DEMOTE
Definition: crm.h:157
#define set_bit(word, bit)
Definition: crm_internal.h:167
#define crm_atoi(text, default_text)
Definition: util.h:96
#define pe_rsc_allow_migrate
Definition: pe_types.h:240
#define pe_rsc_orphan_container_filler
Definition: pe_types.h:220
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition: nvpair.c:484
#define pe_rsc_failed
Definition: pe_types.h:234
gboolean pe__is_guest_node(pe_node_t *node)
Definition: remote.c:47
#define crm_debug(fmt, args...)
Definition: logging.h:245
void native_add_running(resource_t *rsc, node_t *node, pe_working_set_t *data_set)
Definition: native.c:37
pe_resource_t * uber_parent(pe_resource_t *rsc)
Definition: complex.c:746
#define XML_CIB_ATTR_SHUTDOWN
Definition: msg_xml.h:246
#define XML_RSC_ATTR_CONTAINER
Definition: msg_xml.h:205
Utility functions.
#define XML_ATTR_ID
Definition: msg_xml.h:96
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:423
#define XML_CIB_TAG_RESOURCE
Definition: msg_xml.h:174
#define pe_rsc_is_container
Definition: pe_types.h:244
#define XML_BOOLEAN_TRUE
Definition: msg_xml.h:107
#define XML_CIB_TAG_STATE
Definition: msg_xml.h:158
gboolean unpacked
Definition: pe_types.h:197
char * digest_all_calc
Definition: internal.h:313
int failure_timeout
Definition: pe_types.h:306
char * clone_strip(const char *last_rsc_id)
Definition: unpack.c:1512
pe_resource_t *(* find_rsc)(pe_resource_t *parent, const char *search, const pe_node_t *node, int flags)
Definition: pe_types.h:44
match only clone instances
Definition: pe_types.h:81
#define pe_rsc_start_pending
Definition: pe_types.h:236
#define CRM_ATTR_UNAME
Definition: crm.h:88
#define XML_NODE_IS_PEER
Definition: msg_xml.h:241
GListPtr refs
Definition: pe_types.h:421
#define crm_trace(fmt, args...)
Definition: logging.h:246
#define CRMD_JOINSTATE_MEMBER
Definition: crm.h:139
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:121
enum rsc_digest_cmp_val rc
Definition: internal.h:309
char * digest_secure_calc
Definition: internal.h:314
void calculate_active_ops(GList *sorted_op_list, int *start_index, int *stop_index)
Definition: unpack.c:2117
xmlNode * add_node_copy(xmlNode *new_parent, xmlNode *xml_node)
Definition: xml.c:1868
#define pe_flag_startup_fencing
Definition: pe_types.h:102
const char * stonith_action
Definition: pe_types.h:119
struct pe_node_shared_s * details
Definition: pe_types.h:213
GListPtr running_on
Definition: pe_types.h:329
#define crm_log_xml_debug(xml, text)
Definition: logging.h:253
#define pe_rsc_needs_fencing
Definition: pe_types.h:247
unsigned long long flags
Definition: pe_types.h:311
const char * uname
Definition: pe_types.h:179
#define pe_rsc_promotable
Definition: pe_types.h:225
void pe_fence_node(pe_working_set_t *data_set, node_t *node, const char *reason)
Schedule a fence action for a node.
Definition: unpack.c:70
#define XML_TAG_META_SETS
Definition: msg_xml.h:164
Wrappers for and extensions to libxml2.
GHashTable * config_hash
Definition: pe_types.h:127
#define XML_ATTR_UNAME
Definition: msg_xml.h:118
char * clone_name
Definition: pe_types.h:285
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
Definition: utils.c:2309
#define XML_RSC_ATTR_MANAGED
Definition: msg_xml.h:195
xmlNode * create_xml_node(xmlNode *parent, const char *name)
Definition: xml.c:1890
action_t * custom_action(resource_t *rsc, char *key, const char *task, node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set)
Definition: utils.c:441
#define pe_flag_stonith_enabled
Definition: pe_types.h:91
gboolean unpack_status(xmlNode *status, pe_working_set_t *data_set)
Definition: unpack.c:1063
const char * pe_node_attribute_raw(pe_node_t *node, const char *name)
Definition: common.c:462
time_t last_granted
Definition: pe_types.h:414
gboolean unpack_lrm_resources(node_t *node, xmlNode *lrm_rsc_list, pe_working_set_t *data_set)
Definition: unpack.c:2300
pe_resource_t * pe__create_clone_child(pe_resource_t *rsc, pe_working_set_t *data_set)
Definition: clone.c:58
#define XML_LRM_ATTR_MIGRATE_TARGET
Definition: msg_xml.h:286
#define CIB_OPTIONS_FIRST
Definition: msg_xml.h:49
gboolean standby
Definition: pe_types.h:184
#define XML_RSC_ATTR_REMOTE_NODE
Definition: msg_xml.h:208
char * uuid
Definition: pe_types.h:370
#define XML_LRM_ATTR_RESTART_DIGEST
Definition: msg_xml.h:276
gboolean expected_up
Definition: pe_types.h:190
void free_xml(xmlNode *child)
Definition: xml.c:2014
enum pe_obj_types variant
Definition: pe_types.h:293
xmlNode * input
Definition: pe_types.h:113
gboolean granted
Definition: pe_types.h:413
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case)
Definition: strings.c:220
#define XML_CIB_TAG_NODE
Definition: msg_xml.h:159
const char * placement_strategy
Definition: pe_types.h:120
gboolean add_node_attrs(xmlNode *xml_obj, node_t *node, gboolean overwrite, pe_working_set_t *data_set)
Definition: unpack.c:3353
xmlNode * params_all
Definition: internal.h:310
pe_resource_t * pe_find_resource(GListPtr rsc_list, const char *id_rh)
Definition: status.c:360
const char * id
Definition: pe_types.h:178
char * id
Definition: pe_types.h:412
#define crm_config_warn(fmt...)
Definition: crm_internal.h:180
#define XML_ATTR_TRANSITION_KEY
Definition: msg_xml.h:357
gboolean rsc_discovery_enabled
Definition: pe_types.h:193
#define CRM_XS
Definition: logging.h:34
GListPtr running_rsc
Definition: pe_types.h:201
pe_node_t * dc_node
Definition: pe_types.h:118
GHashTable * node_hash_from_list(GListPtr list)
Definition: utils.c:174
const char * localhost
Definition: pe_types.h:155
guint crm_parse_ms(const char *text)
Definition: strings.c:143
#define pe_flag_quick_location
Definition: pe_types.h:108
gboolean xml_contains_remote_node(xmlNode *xml)
Definition: remote.c:92
gboolean is_remote_node
Definition: pe_types.h:314
pe_node_t * pending_node
Definition: pe_types.h:346
const char * fail2text(enum action_fail_response fail)
Definition: common.c:187
GListPtr children
Definition: pe_types.h:340
#define XML_LRM_TAG_RESOURCES
Definition: msg_xml.h:226
#define crm_err(fmt, args...)
Definition: logging.h:240
#define XML_CIB_TAG_TICKET_STATE
Definition: msg_xml.h:385
#define set_config_flag(data_set, option, flag)
Definition: unpack.c:27
void resource_location(resource_t *rsc, node_t *node, int score, const char *tag, pe_working_set_t *data_set)
Definition: utils.c:1565
#define CRM_ASSERT(expr)
Definition: results.h:42
xmlXPathObjectPtr xpath_search(xmlNode *xml_top, const char *path)
Definition: xpath.c:145
pe_action_t * pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_working_set_t *data_set)
Schedule a controller operation to clear a fail count.
Definition: failcounts.c:360
ticket_t * ticket_new(const char *ticket_id, pe_working_set_t *data_set)
Definition: utils.c:1857
char * clone_zero(const char *last_rsc_id)
Definition: unpack.c:1534
#define XML_ATTR_HAVE_WATCHDOG
Definition: msg_xml.h:86
#define XML_NODE_ATTR_RSC_DISCOVERY
Definition: msg_xml.h:342
gboolean unpack_nodes(xmlNode *xml_nodes, pe_working_set_t *data_set)
Definition: unpack.c:501
#define CRMD_ACTION_METADATA
Definition: crm.h:164
#define XML_LRM_ATTR_INTERVAL_MS
Definition: msg_xml.h:258
xmlNode * params_secure
Definition: internal.h:311
#define XML_LRM_ATTR_CALLID
Definition: msg_xml.h:272
#define CRMD_ACTION_MIGRATE
Definition: crm.h:146
#define XML_NVPAIR_ATTR_VALUE
Definition: msg_xml.h:340
int node_score_red
Definition: utils.c:63
#define crm_str_hash
Definition: util.h:60
void(* free)(pe_resource_t *)
Definition: pe_types.h:53
GHashTable * utilization
Definition: pe_types.h:205
enum rsc_role_e fail_role
Definition: pe_types.h:377
gboolean shutdown
Definition: pe_types.h:189
char data[0]
Definition: internal.h:92
#define crm_str(x)
Definition: logging.h:266
#define XML_LRM_ATTR_OPSTATUS
Definition: msg_xml.h:270
int pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set)
Definition: failcounts.c:251
#define CRMD_JOINSTATE_PENDING
Definition: crm.h:138
rsc_role_e
Definition: common.h:86
#define pe_rsc_block
Definition: pe_types.h:219
enum pe_action_flags flags
Definition: pe_types.h:374
gboolean maintenance
Definition: pe_types.h:192
#define XML_LRM_ATTR_RC
Definition: msg_xml.h:271
GHashTable * digest_cache
cache of calculated resource digests
Definition: pe_types.h:206
bool pe__is_universal_clone(pe_resource_t *rsc, pe_working_set_t *data_set)
Definition: clone.c:631
#define pe_rsc_failure_ignored
Definition: pe_types.h:242
pe_node_t *(* location)(const pe_resource_t *, GList **, int)
Definition: pe_types.h:52
#define XML_NODE_JOIN_STATE
Definition: msg_xml.h:238
void pe_free_action(action_t *action)
Definition: utils.c:1313
void destroy_ticket(gpointer data)
Definition: utils.c:1845
#define XML_CIB_TAG_STATUS
Definition: msg_xml.h:139
#define XML_CIB_TAG_OBJ_REF
Definition: msg_xml.h:389
void unpack_instance_attributes(xmlNode *top, xmlNode *xml_obj, const char *set_name, GHashTable *node_hash, GHashTable *hash, const char *always_first, gboolean overwrite, crm_time_t *now)
Definition: rules.c:904
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:240
gboolean pe__is_remote_node(pe_node_t *node)
Definition: remote.c:36
gboolean crm_is_true(const char *s)
Definition: strings.c:172
#define pe_flag_have_stonith_resource
Definition: pe_types.h:92
#define CRM_ATTR_SITE_NAME
Definition: crm.h:94
#define XML_CIB_TAG_GROUP
Definition: msg_xml.h:175
#define pe_flag_enable_unfencing
Definition: pe_types.h:93
#define XML_LRM_TAG_RSC_OP
Definition: msg_xml.h:228
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:18
uint32_t pe_wo
Definition: unpack.c:43
#define ID(x)
Definition: msg_xml.h:414
unsigned long long flags
Definition: pe_types.h:122
#define pe_err(fmt...)
Definition: internal.h:20
void print_resource(int log_level, const char *pre_text, resource_t *rsc, gboolean details)
Definition: utils.c:1297
#define safe_str_eq(a, b)
Definition: util.h:59
int node_score_green
Definition: utils.c:64
#define ONLINESTATUS
Definition: util.h:36
gboolean order_actions(action_t *lh_action, action_t *rh_action, enum pe_ordering order)
Definition: utils.c:1776
op_digest_cache_t * rsc_action_digest_cmp(resource_t *rsc, xmlNode *xml_op, node_t *node, pe_working_set_t *data_set)
Definition: utils.c:2033
GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: utils.c:1517
gboolean standby_onfail
Definition: pe_types.h:185
#define XML_LRM_ATTR_MIGRATE_SOURCE
Definition: msg_xml.h:285
void freeXpathObject(xmlXPathObjectPtr xpathObj)
Definition: xpath.c:45
#define CRM_ATTR_ID
Definition: crm.h:89
const char * pe_base_name_end(const char *id)
Definition: unpack.c:1472
gint sort_node_uname(gconstpointer a, gconstpointer b)
Definition: utils.c:217
gboolean unclean
Definition: pe_types.h:187
char * crm_strdup_printf(char const *format,...) __attribute__((__format__(__printf__
#define XPATH_ENABLE_UNFENCING
Definition: unpack.c:147
GList * GListPtr
Definition: crm.h:192
#define pe_flag_start_failure_fatal
Definition: pe_types.h:100
enum node_type type
Definition: pe_types.h:180
int node_score_yellow
Definition: utils.c:65
#define XML_CIB_TAG_TICKETS
Definition: msg_xml.h:384
crm_time_t * now
Definition: pe_types.h:114
#define crm_info(fmt, args...)
Definition: logging.h:243
char * digest_restart_calc
Definition: internal.h:315
#define pe_rsc_managed
Definition: pe_types.h:218
#define pe_rsc_orphan
Definition: pe_types.h:217
char * generate_op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key.
Definition: operations.c:39
GHashTable * template_rsc_sets
Definition: pe_types.h:154
pe_node_t * pe_find_node_any(GListPtr node_list, const char *id, const char *uname)
Definition: status.c:384
gboolean online
Definition: pe_types.h:183
GList * stop_needed
Definition: pe_types.h:162
pe_resource_t * parent
Definition: pe_types.h:291
enum crm_ais_msg_types type
Definition: internal.h:85
#define pe_warn_once(pe_wo_bit, fmt...)
Definition: unpack.h:97
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:16
char * id
Definition: pe_types.h:284
GHashTable * allowed_nodes
Definition: pe_types.h:331
#define pe_flag_startup_probes
Definition: pe_types.h:104
#define CRMD_ACTION_STATUS
Definition: crm.h:163
#define pe_flag_stop_rsc_orphans
Definition: pe_types.h:96