pacemaker  2.0.5-ba59be712
Scalable High-Availability cluster resource manager
pcmk_sched_allocate.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2020 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <sys/param.h>
13 
14 #include <crm/crm.h>
15 #include <crm/cib.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
19 
20 #include <glib.h>
21 
22 #include <crm/pengine/status.h>
23 #include <pacemaker-internal.h>
24 
25 CRM_TRACE_INIT_DATA(pacemaker);
26 
27 void set_alloc_actions(pe_working_set_t * data_set);
28 extern void ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set);
29 extern gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set);
30 static void apply_remote_node_ordering(pe_working_set_t *data_set);
31 static enum remote_connection_state get_remote_node_state(pe_node_t *node);
32 
39 };
40 
41 static const char *
42 state2text(enum remote_connection_state state)
43 {
44  switch (state) {
46  return "unknown";
47  case remote_state_alive:
48  return "alive";
50  return "resting";
52  return "failed";
54  return "stopped";
55  }
56 
57  return "impossible";
58 }
59 
61  {
74  },
75  {
88  },
89  {
100  clone_expand,
102  },
103  {
116  }
117 };
118 
119 gboolean
120 update_action_flags(pe_action_t * action, enum pe_action_flags flags, const char *source, int line)
121 {
122  static unsigned long calls = 0;
123  gboolean changed = FALSE;
124  gboolean clear = pcmk_is_set(flags, pe_action_clear);
125  enum pe_action_flags last = action->flags;
126 
127  if (clear) {
128  pe__clear_action_flags_as(source, line, action, flags);
129  } else {
130  pe__set_action_flags_as(source, line, action, flags);
131  }
132 
133  if (last != action->flags) {
134  calls++;
135  changed = TRUE;
136  /* Useful for tracking down _who_ changed a specific flag */
137  /* CRM_ASSERT(calls != 534); */
139  crm_trace("%s on %s: %sset flags 0x%.6x (was 0x%.6x, now 0x%.6x, %lu, %s)",
140  action->uuid, action->node ? action->node->details->uname : "[none]",
141  clear ? "un-" : "", flags, last, action->flags, calls, source);
142  }
143 
144  return changed;
145 }
146 
147 static gboolean
148 check_rsc_parameters(pe_resource_t * rsc, pe_node_t * node, xmlNode * rsc_entry,
149  gboolean active_here, pe_working_set_t * data_set)
150 {
151  int attr_lpc = 0;
152  gboolean force_restart = FALSE;
153  gboolean delete_resource = FALSE;
154  gboolean changed = FALSE;
155 
156  const char *value = NULL;
157  const char *old_value = NULL;
158 
159  const char *attr_list[] = {
163  };
164 
165  for (; attr_lpc < DIMOF(attr_list); attr_lpc++) {
166  value = crm_element_value(rsc->xml, attr_list[attr_lpc]);
167  old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]);
168  if (value == old_value /* i.e. NULL */
169  || pcmk__str_eq(value, old_value, pcmk__str_none)) {
170  continue;
171  }
172 
173  changed = TRUE;
174  trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set);
175  if (active_here) {
176  force_restart = TRUE;
177  crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s",
178  rsc->id, node->details->uname, attr_list[attr_lpc],
179  crm_str(old_value), crm_str(value));
180  }
181  }
182  if (force_restart) {
183  /* make sure the restart happens */
184  stop_action(rsc, node, FALSE);
186  delete_resource = TRUE;
187 
188  } else if (changed) {
189  delete_resource = TRUE;
190  }
191  return delete_resource;
192 }
193 
194 static void
195 CancelXmlOp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * active_node,
196  const char *reason, pe_working_set_t * data_set)
197 {
198  guint interval_ms = 0;
199  pe_action_t *cancel = NULL;
200 
201  const char *task = NULL;
202  const char *call_id = NULL;
203 
204  CRM_CHECK(xml_op != NULL, return);
205  CRM_CHECK(active_node != NULL, return);
206 
207  task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
208  call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
209  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
210 
211  crm_info("Action " PCMK__OP_FMT " on %s will be stopped: %s",
212  rsc->id, task, interval_ms,
213  active_node->details->uname, (reason? reason : "unknown"));
214 
215  cancel = pe_cancel_op(rsc, task, interval_ms, active_node, data_set);
216  add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
217  custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set);
218 }
219 
220 static gboolean
221 check_action_definition(pe_resource_t * rsc, pe_node_t * active_node, xmlNode * xml_op,
222  pe_working_set_t * data_set)
223 {
224  char *key = NULL;
225  guint interval_ms = 0;
226  const op_digest_cache_t *digest_data = NULL;
227  gboolean did_change = FALSE;
228 
229  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
230  const char *digest_secure = NULL;
231 
232  CRM_CHECK(active_node != NULL, return FALSE);
233 
234  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
235  if (interval_ms > 0) {
236  xmlNode *op_match = NULL;
237 
238  /* we need to reconstruct the key because of the way we used to construct resource IDs */
239  key = pcmk__op_key(rsc->id, task, interval_ms);
240 
241  pe_rsc_trace(rsc, "Checking parameters for %s", key);
242  op_match = find_rsc_op_entry(rsc, key);
243 
244  if ((op_match == NULL)
246  CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set);
247  free(key);
248  return TRUE;
249 
250  } else if (op_match == NULL) {
251  pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname);
252  free(key);
253  return TRUE;
254  }
255  free(key);
256  key = NULL;
257  }
258 
259  crm_trace("Testing " PCMK__OP_FMT " on %s",
260  rsc->id, task, interval_ms, active_node->details->uname);
261  if ((interval_ms == 0) && pcmk__str_eq(task, RSC_STATUS, pcmk__str_casei)) {
262  /* Reload based on the start action not a probe */
263  task = RSC_START;
264 
265  } else if ((interval_ms == 0) && pcmk__str_eq(task, RSC_MIGRATED, pcmk__str_casei)) {
266  /* Reload based on the start action not a migrate */
267  task = RSC_START;
268  } else if ((interval_ms == 0) && pcmk__str_eq(task, RSC_PROMOTE, pcmk__str_casei)) {
269  /* Reload based on the start action not a promote */
270  task = RSC_START;
271  }
272 
273  digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set);
274 
275  if (pcmk_is_set(data_set->flags, pe_flag_sanitized)) {
276  digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
277  }
278 
279  if(digest_data->rc != RSC_DIGEST_MATCH
280  && digest_secure
281  && digest_data->digest_secure_calc
282  && strcmp(digest_data->digest_secure_calc, digest_secure) == 0) {
283  if (pcmk_is_set(data_set->flags, pe_flag_stdout)) {
284  printf("Only 'private' parameters to " PCMK__OP_FMT
285  " on %s changed: %s\n",
286  rsc->id, task, interval_ms, active_node->details->uname,
288  }
289 
290  } else if (digest_data->rc == RSC_DIGEST_RESTART) {
291  /* Changes that force a restart */
292  pe_action_t *required = NULL;
293 
294  did_change = TRUE;
295  key = pcmk__op_key(rsc->id, task, interval_ms);
296  crm_log_xml_info(digest_data->params_restart, "params:restart");
297  required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
298  pe_action_set_flag_reason(__func__, __LINE__, required, NULL,
299  "resource definition change", pe_action_optional, TRUE);
300 
301  trigger_unfencing(rsc, active_node, "Device parameters changed", NULL, data_set);
302 
303  } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) {
304  /* Changes that can potentially be handled by a reload */
305  const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
306 
307  did_change = TRUE;
308  trigger_unfencing(rsc, active_node, "Device parameters changed (reload)", NULL, data_set);
309  crm_log_xml_info(digest_data->params_all, "params:reload");
310  key = pcmk__op_key(rsc->id, task, interval_ms);
311 
312  if (interval_ms > 0) {
313  pe_action_t *op = NULL;
314 
315 #if 0
316  /* Always reload/restart the entire resource */
317  ReloadRsc(rsc, active_node, data_set);
318 #else
319  /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */
320  op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set);
322 #endif
323 
324  } else if (digest_restart) {
325  pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id);
326 
327  /* Reload this resource */
328  ReloadRsc(rsc, active_node, data_set);
329  free(key);
330 
331  } else {
332  pe_action_t *required = NULL;
333  pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id);
334 
335  /* Re-send the start/demote/promote op
336  * Recurring ops will be detected independently
337  */
338  required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
339  pe_action_set_flag_reason(__func__, __LINE__, required, NULL,
340  "resource definition change", pe_action_optional, TRUE);
341  }
342  }
343 
344  return did_change;
345 }
346 
353 static void
354 check_params(pe_resource_t *rsc, pe_node_t *node, xmlNode *rsc_op,
355  enum pe_check_parameters check, pe_working_set_t *data_set)
356 {
357  const char *reason = NULL;
358  op_digest_cache_t *digest_data = NULL;
359 
360  switch (check) {
361  case pe_check_active:
362  if (check_action_definition(rsc, node, rsc_op, data_set)
363  && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
364  data_set)) {
365 
366  reason = "action definition changed";
367  }
368  break;
369 
371  digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, data_set);
372  switch (digest_data->rc) {
373  case RSC_DIGEST_UNKNOWN:
374  crm_trace("Resource %s history entry %s on %s has no digest to compare",
375  rsc->id, ID(rsc_op), node->details->id);
376  break;
377  case RSC_DIGEST_MATCH:
378  break;
379  default:
380  reason = "resource parameters have changed";
381  break;
382  }
383  break;
384  }
385 
386  if (reason) {
387  pe__clear_failcount(rsc, node, reason, data_set);
388  }
389 }
390 
391 static void
392 check_actions_for(xmlNode * rsc_entry, pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set)
393 {
394  GListPtr gIter = NULL;
395  int offset = -1;
396  int stop_index = 0;
397  int start_index = 0;
398 
399  const char *task = NULL;
400 
401  xmlNode *rsc_op = NULL;
402  GListPtr op_list = NULL;
403  GListPtr sorted_op_list = NULL;
404 
405  CRM_CHECK(node != NULL, return);
406 
407  if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
408  pe_resource_t *parent = uber_parent(rsc);
409  if(parent == NULL
410  || pe_rsc_is_clone(parent) == FALSE
411  || pcmk_is_set(parent->flags, pe_rsc_unique)) {
412  pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id);
413  DeleteRsc(rsc, node, FALSE, data_set);
414  } else {
415  pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id);
416  }
417  return;
418 
419  } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
420  if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) {
421  DeleteRsc(rsc, node, FALSE, data_set);
422  }
423  pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s",
424  rsc->id, node->details->uname);
425  return;
426  }
427 
428  pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname);
429 
430  if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) {
431  DeleteRsc(rsc, node, FALSE, data_set);
432  }
433 
434  for (rsc_op = pcmk__xe_first_child(rsc_entry); rsc_op != NULL;
435  rsc_op = pcmk__xe_next(rsc_op)) {
436 
437  if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) {
438  op_list = g_list_prepend(op_list, rsc_op);
439  }
440  }
441 
442  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
443  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
444 
445  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
446  xmlNode *rsc_op = (xmlNode *) gIter->data;
447  guint interval_ms = 0;
448 
449  offset++;
450 
451  if (start_index < stop_index) {
452  /* stopped */
453  continue;
454  } else if (offset < start_index) {
455  /* action occurred prior to a start */
456  continue;
457  }
458 
459  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
460  crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
461 
462  if ((interval_ms > 0) &&
464  // Maintenance mode cancels recurring operations
465  CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set);
466 
467  } else if ((interval_ms > 0) || pcmk__strcase_any_of(task, RSC_STATUS, RSC_START,
468  RSC_PROMOTE, RSC_MIGRATED, NULL)) {
469  /* If a resource operation failed, and the operation's definition
470  * has changed, clear any fail count so they can be retried fresh.
471  */
472 
473  if (pe__bundle_needs_remote_name(rsc)) {
474  /* We haven't allocated resources to nodes yet, so if the
475  * REMOTE_CONTAINER_HACK is used, we may calculate the digest
476  * based on the literal "#uname" value rather than the properly
477  * substituted value. That would mistakenly make the action
478  * definition appear to have been changed. Defer the check until
479  * later in this case.
480  */
481  pe__add_param_check(rsc_op, rsc, node, pe_check_active,
482  data_set);
483 
484  } else if (check_action_definition(rsc, node, rsc_op, data_set)
485  && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
486  data_set)) {
487  pe__clear_failcount(rsc, node, "action definition changed",
488  data_set);
489  }
490  }
491  }
492  g_list_free(sorted_op_list);
493 }
494 
495 static GListPtr
496 find_rsc_list(GListPtr result, pe_resource_t * rsc, const char *id, gboolean renamed_clones,
497  gboolean partial, pe_working_set_t * data_set)
498 {
499  GListPtr gIter = NULL;
500  gboolean match = FALSE;
501 
502  if (id == NULL) {
503  return NULL;
504  }
505 
506  if (rsc == NULL) {
507  if (data_set == NULL) {
508  return NULL;
509  }
510  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
511  pe_resource_t *child = (pe_resource_t *) gIter->data;
512 
513  result = find_rsc_list(result, child, id, renamed_clones, partial,
514  NULL);
515  }
516  return result;
517  }
518 
519  if (partial) {
520  if (strstr(rsc->id, id)) {
521  match = TRUE;
522 
523  } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) {
524  match = TRUE;
525  }
526 
527  } else {
528  if (strcmp(rsc->id, id) == 0) {
529  match = TRUE;
530 
531  } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) {
532  match = TRUE;
533  }
534  }
535 
536  if (match) {
537  result = g_list_prepend(result, rsc);
538  }
539 
540  if (rsc->children) {
541  gIter = rsc->children;
542  for (; gIter != NULL; gIter = gIter->next) {
543  pe_resource_t *child = (pe_resource_t *) gIter->data;
544 
545  result = find_rsc_list(result, child, id, renamed_clones, partial, NULL);
546  }
547  }
548 
549  return result;
550 }
551 
552 static void
553 check_actions(pe_working_set_t * data_set)
554 {
555  const char *id = NULL;
556  pe_node_t *node = NULL;
557  xmlNode *lrm_rscs = NULL;
558  xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
559 
560  xmlNode *node_state = NULL;
561 
562  for (node_state = pcmk__xe_first_child(status); node_state != NULL;
563  node_state = pcmk__xe_next(node_state)) {
564 
565  if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE,
566  pcmk__str_none)) {
567  id = crm_element_value(node_state, XML_ATTR_ID);
568  lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
569  lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE);
570 
571  node = pe_find_node_id(data_set->nodes, id);
572 
573  if (node == NULL) {
574  continue;
575 
576  /* Still need to check actions for a maintenance node to cancel existing monitor operations */
577  } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) {
578  crm_trace("Skipping param check for %s: can't run resources",
579  node->details->uname);
580  continue;
581  }
582 
583  crm_trace("Processing node %s", node->details->uname);
584  if (node->details->online
585  || pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
586  xmlNode *rsc_entry = NULL;
587 
588  for (rsc_entry = pcmk__xe_first_child(lrm_rscs);
589  rsc_entry != NULL;
590  rsc_entry = pcmk__xe_next(rsc_entry)) {
591 
592  if (pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) {
593 
594  if (xml_has_children(rsc_entry)) {
595  GListPtr gIter = NULL;
596  GListPtr result = NULL;
597  const char *rsc_id = ID(rsc_entry);
598 
599  CRM_CHECK(rsc_id != NULL, return);
600 
601  result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set);
602  for (gIter = result; gIter != NULL; gIter = gIter->next) {
603  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
604 
605  if (rsc->variant != pe_native) {
606  continue;
607  }
608  check_actions_for(rsc_entry, rsc, node, data_set);
609  }
610  g_list_free(result);
611  }
612  }
613  }
614  }
615  }
616  }
617 }
618 
619 static void
620 apply_placement_constraints(pe_working_set_t * data_set)
621 {
622  for (GList *gIter = data_set->placement_constraints;
623  gIter != NULL; gIter = gIter->next) {
624  pe__location_t *cons = gIter->data;
625 
626  cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons);
627  }
628 }
629 
630 static gboolean
631 failcount_clear_action_exists(pe_node_t * node, pe_resource_t * rsc)
632 {
633  gboolean rc = FALSE;
634  GList *list = pe__resource_actions(rsc, node, CRM_OP_CLEAR_FAILCOUNT, TRUE);
635 
636  if (list) {
637  rc = TRUE;
638  }
639  g_list_free(list);
640  return rc;
641 }
642 
651 static void
652 check_migration_threshold(pe_resource_t *rsc, pe_node_t *node,
653  pe_working_set_t *data_set)
654 {
655  int fail_count, countdown;
656  pe_resource_t *failed;
657 
658  /* Migration threshold of 0 means never force away */
659  if (rsc->migration_threshold == 0) {
660  return;
661  }
662 
663  // If we're ignoring failures, also ignore the migration threshold
665  return;
666  }
667 
668  /* If there are no failures, there's no need to force away */
669  fail_count = pe_get_failcount(node, rsc, NULL,
671  data_set);
672  if (fail_count <= 0) {
673  return;
674  }
675 
676  /* How many more times recovery will be tried on this node */
677  countdown = QB_MAX(rsc->migration_threshold - fail_count, 0);
678 
679  /* If failed resource has a parent, we'll force the parent away */
680  failed = rsc;
681  if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
682  failed = uber_parent(rsc);
683  }
684 
685  if (countdown == 0) {
686  resource_location(failed, node, -INFINITY, "__fail_limit__", data_set);
687  crm_warn("Forcing %s away from %s after %d failures (max=%d)",
688  failed->id, node->details->uname, fail_count,
689  rsc->migration_threshold);
690  } else {
691  crm_info("%s can fail %d more times on %s before being forced off",
692  failed->id, countdown, node->details->uname);
693  }
694 }
695 
696 static void
697 common_apply_stickiness(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set)
698 {
699  if (rsc->children) {
700  GListPtr gIter = rsc->children;
701 
702  for (; gIter != NULL; gIter = gIter->next) {
703  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
704 
705  common_apply_stickiness(child_rsc, node, data_set);
706  }
707  return;
708  }
709 
710  if (pcmk_is_set(rsc->flags, pe_rsc_managed)
711  && rsc->stickiness != 0 && pcmk__list_of_1(rsc->running_on)) {
712  pe_node_t *current = pe_find_node_id(rsc->running_on, node->details->id);
713  pe_node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
714 
715  if (current == NULL) {
716 
717  } else if ((match != NULL)
719  pe_resource_t *sticky_rsc = rsc;
720 
721  resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set);
722  pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location"
723  " (node=%s, weight=%d)", sticky_rsc->id,
724  node->details->uname, rsc->stickiness);
725  } else {
726  GHashTableIter iter;
727  pe_node_t *nIter = NULL;
728 
729  pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric"
730  " and node %s is not explicitly allowed", rsc->id, node->details->uname);
731  g_hash_table_iter_init(&iter, rsc->allowed_nodes);
732  while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) {
733  crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight);
734  }
735  }
736  }
737 
738  /* Check the migration threshold only if a failcount clear action
739  * has not already been placed for this resource on the node.
740  * There is no sense in potentially forcing the resource from this
741  * node if the failcount is being reset anyway.
742  *
743  * @TODO A clear_failcount operation can be scheduled in stage4() via
744  * check_actions_for(), or in stage5() via check_params(). This runs in
745  * stage2(), so it cannot detect those, meaning we might check the migration
746  * threshold when we shouldn't -- worst case, we stop or move the resource,
747  * then move it back next transition.
748  */
749  if (failcount_clear_action_exists(node, rsc) == FALSE) {
750  check_migration_threshold(rsc, node, data_set);
751  }
752 }
753 
754 void
756 {
757  GListPtr gIter = rsc->children;
758 
760 
761  for (; gIter != NULL; gIter = gIter->next) {
762  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
763 
764  complex_set_cmds(child_rsc);
765  }
766 }
767 
768 void
770 {
771 
772  GListPtr gIter = data_set->resources;
773 
774  for (; gIter != NULL; gIter = gIter->next) {
775  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
776 
777  complex_set_cmds(rsc);
778  }
779 }
780 
781 static void
782 calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data)
783 {
784  const char *key = (const char *)gKey;
785  const char *value = (const char *)gValue;
786  int *system_health = (int *)user_data;
787 
788  if (!gKey || !gValue || !user_data) {
789  return;
790  }
791 
792  if (pcmk__starts_with(key, "#health")) {
793  int score;
794 
795  /* Convert the value into an integer */
796  score = char2score(value);
797 
798  /* Add it to the running total */
799  *system_health = pe__add_scores(score, *system_health);
800  }
801 }
802 
803 static gboolean
804 apply_system_health(pe_working_set_t * data_set)
805 {
806  GListPtr gIter = NULL;
807  const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy");
808  int base_health = 0;
809 
810  if (pcmk__str_eq(health_strategy, "none", pcmk__str_null_matches | pcmk__str_casei)) {
811  /* Prevent any accidental health -> score translation */
812  pcmk__score_red = 0;
813  pcmk__score_yellow = 0;
814  pcmk__score_green = 0;
815  return TRUE;
816 
817  } else if (pcmk__str_eq(health_strategy, "migrate-on-red", pcmk__str_casei)) {
818 
819  /* Resources on nodes which have health values of red are
820  * weighted away from that node.
821  */
823  pcmk__score_yellow = 0;
824  pcmk__score_green = 0;
825 
826  } else if (pcmk__str_eq(health_strategy, "only-green", pcmk__str_casei)) {
827 
828  /* Resources on nodes which have health values of red or yellow
829  * are forced away from that node.
830  */
833  pcmk__score_green = 0;
834 
835  } else if (pcmk__str_eq(health_strategy, "progressive", pcmk__str_casei)) {
836  /* Same as the above, but use the r/y/g scores provided by the user
837  * Defaults are provided by the pe_prefs table
838  * Also, custom health "base score" can be used
839  */
840  base_health = crm_parse_int(pe_pref(data_set->config_hash, "node-health-base"), "0");
841 
842  } else if (pcmk__str_eq(health_strategy, "custom", pcmk__str_casei)) {
843 
844  /* Requires the admin to configure the rsc_location constaints for
845  * processing the stored health scores
846  */
847  /* TODO: Check for the existence of appropriate node health constraints */
848  return TRUE;
849 
850  } else {
851  crm_err("Unknown node health strategy: %s", health_strategy);
852  return FALSE;
853  }
854 
855  crm_info("Applying automated node health strategy: %s", health_strategy);
856 
857  for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
858  int system_health = base_health;
859  pe_node_t *node = (pe_node_t *) gIter->data;
860 
861  /* Search through the node hash table for system health entries. */
862  g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health);
863 
864  crm_info(" Node %s has an combined system health of %d",
865  node->details->uname, system_health);
866 
867  /* If the health is non-zero, then create a new rsc2node so that the
868  * weight will be added later on.
869  */
870  if (system_health != 0) {
871 
872  GListPtr gIter2 = data_set->resources;
873 
874  for (; gIter2 != NULL; gIter2 = gIter2->next) {
875  pe_resource_t *rsc = (pe_resource_t *) gIter2->data;
876 
877  rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set);
878  }
879  }
880  }
881 
882  return TRUE;
883 }
884 
885 gboolean
887 {
888  xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input);
889 
890  if (data_set->input == NULL) {
891  return FALSE;
892  }
893 
894  if (!pcmk_is_set(data_set->flags, pe_flag_have_status)) {
895  crm_trace("Calculating status");
896  cluster_status(data_set);
897  }
898 
899  set_alloc_actions(data_set);
900  apply_system_health(data_set);
901  unpack_constraints(cib_constraints, data_set);
902 
903  return TRUE;
904 }
905 
906 /*
907  * Check nodes for resources started outside of the LRM
908  */
909 gboolean
911 {
912  pe_action_t *probe_node_complete = NULL;
913 
914  for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
915  pe_node_t *node = (pe_node_t *) gIter->data;
916  const char *probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
917 
918  if (node->details->online == FALSE) {
919 
920  if (pe__is_remote_node(node) && node->details->remote_rsc
921  && (get_remote_node_state(node) == remote_state_failed)) {
922 
923  pe_fence_node(data_set, node, "the connection is unrecoverable", FALSE);
924  }
925  continue;
926 
927  } else if (node->details->unclean) {
928  continue;
929 
930  } else if (node->details->rsc_discovery_enabled == FALSE) {
931  /* resource discovery is disabled for this node */
932  continue;
933  }
934 
935  if (probed != NULL && crm_is_true(probed) == FALSE) {
936  pe_action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname),
937  CRM_OP_REPROBE, node, FALSE, TRUE, data_set);
938 
940  continue;
941  }
942 
943  for (GListPtr gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) {
944  pe_resource_t *rsc = (pe_resource_t *) gIter2->data;
945 
946  rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set);
947  }
948  }
949  return TRUE;
950 }
951 
952 static void
953 rsc_discover_filter(pe_resource_t *rsc, pe_node_t *node)
954 {
955  GListPtr gIter = rsc->children;
956  pe_resource_t *top = uber_parent(rsc);
957  pe_node_t *match;
958 
959  if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) {
960  return;
961  }
962 
963  for (; gIter != NULL; gIter = gIter->next) {
964  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
965  rsc_discover_filter(child_rsc, node);
966  }
967 
968  match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
969  if (match && match->rsc_discover_mode != pe_discover_exclusive) {
970  match->weight = -INFINITY;
971  }
972 }
973 
974 static time_t
975 shutdown_time(pe_node_t *node, pe_working_set_t *data_set)
976 {
977  const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
978  time_t result = 0;
979 
980  if (shutdown) {
981  errno = 0;
982  result = (time_t) crm_parse_ll(shutdown, NULL);
983  if (errno != 0) {
984  result = 0;
985  }
986  }
987  return result? result : get_effective_time(data_set);
988 }
989 
990 static void
991 apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set)
992 {
993  const char *class;
994 
995  // Only primitives and (uncloned) groups may be locked
996  if (rsc->variant == pe_group) {
997  for (GList *item = rsc->children; item != NULL;
998  item = item->next) {
999  apply_shutdown_lock((pe_resource_t *) item->data, data_set);
1000  }
1001  } else if (rsc->variant != pe_native) {
1002  return;
1003  }
1004 
1005  // Fence devices and remote connections can't be locked
1006  class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
1007  if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches)
1008  || pe__resource_is_remote_conn(rsc, data_set)) {
1009  return;
1010  }
1011 
1012  if (rsc->lock_node != NULL) {
1013  // The lock was obtained from resource history
1014 
1015  if (rsc->running_on != NULL) {
1016  /* The resource was started elsewhere even though it is now
1017  * considered locked. This shouldn't be possible, but as a
1018  * failsafe, we don't want to disturb the resource now.
1019  */
1020  pe_rsc_info(rsc,
1021  "Cancelling shutdown lock because %s is already active",
1022  rsc->id);
1023  pe__clear_resource_history(rsc, rsc->lock_node, data_set);
1024  rsc->lock_node = NULL;
1025  rsc->lock_time = 0;
1026  }
1027 
1028  // Only a resource active on exactly one node can be locked
1029  } else if (pcmk__list_of_1(rsc->running_on)) {
1030  pe_node_t *node = rsc->running_on->data;
1031 
1032  if (node->details->shutdown) {
1033  if (node->details->unclean) {
1034  pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
1035  rsc->id, node->details->uname);
1036  } else {
1037  rsc->lock_node = node;
1038  rsc->lock_time = shutdown_time(node, data_set);
1039  }
1040  }
1041  }
1042 
1043  if (rsc->lock_node == NULL) {
1044  // No lock needed
1045  return;
1046  }
1047 
1048  if (data_set->shutdown_lock > 0) {
1049  time_t lock_expiration = rsc->lock_time + data_set->shutdown_lock;
1050 
1051  pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
1052  rsc->id, rsc->lock_node->details->uname,
1053  (long long) lock_expiration);
1054  pe__update_recheck_time(++lock_expiration, data_set);
1055  } else {
1056  pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
1057  rsc->id, rsc->lock_node->details->uname);
1058  }
1059 
1060  // If resource is locked to one node, ban it from all other nodes
1061  for (GList *item = data_set->nodes; item != NULL; item = item->next) {
1062  pe_node_t *node = item->data;
1063 
1064  if (strcmp(node->details->uname, rsc->lock_node->details->uname)) {
1066  XML_CONFIG_ATTR_SHUTDOWN_LOCK, data_set);
1067  }
1068  }
1069 }
1070 
1071 /*
1072  * \internal
1073  * \brief Stage 2 of cluster status: apply node-specific criteria
1074  *
1075  * Count known nodes, and apply location constraints, stickiness, and exclusive
1076  * resource discovery.
1077  */
1078 gboolean
1080 {
1081  GListPtr gIter = NULL;
1082 
1083  if (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) {
1084  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1085  apply_shutdown_lock((pe_resource_t *) gIter->data, data_set);
1086  }
1087  }
1088 
1089  if (!pcmk_is_set(data_set->flags, pe_flag_no_compat)) {
1090  // @COMPAT API backward compatibility
1091  for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1092  pe_node_t *node = (pe_node_t *) gIter->data;
1093 
1094  if (node && (node->weight >= 0) && node->details->online
1095  && (node->details->type != node_ping)) {
1096  data_set->max_valid_nodes++;
1097  }
1098  }
1099  }
1100 
1101  apply_placement_constraints(data_set);
1102 
1103  gIter = data_set->nodes;
1104  for (; gIter != NULL; gIter = gIter->next) {
1105  GListPtr gIter2 = NULL;
1106  pe_node_t *node = (pe_node_t *) gIter->data;
1107 
1108  gIter2 = data_set->resources;
1109  for (; gIter2 != NULL; gIter2 = gIter2->next) {
1110  pe_resource_t *rsc = (pe_resource_t *) gIter2->data;
1111 
1112  common_apply_stickiness(rsc, node, data_set);
1113  rsc_discover_filter(rsc, node);
1114  }
1115  }
1116 
1117  return TRUE;
1118 }
1119 
1120 /*
1121  * Create internal resource constraints before allocation
1122  */
1123 gboolean
1125 {
1126 
1127  GListPtr gIter = data_set->resources;
1128 
1129  for (; gIter != NULL; gIter = gIter->next) {
1130  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1131 
1132  rsc->cmds->internal_constraints(rsc, data_set);
1133  }
1134 
1135  return TRUE;
1136 }
1137 
1138 /*
1139  * Check for orphaned or redefined actions
1140  */
1141 gboolean
1143 {
1144  check_actions(data_set);
1145  return TRUE;
1146 }
1147 
1148 static void *
1149 convert_const_pointer(const void *ptr)
1150 {
1151  /* Worst function ever */
1152  return (void *)ptr;
1153 }
1154 
1155 static gint
1156 sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data)
1157 {
1158  int rc = 0;
1159  int r1_weight = -INFINITY;
1160  int r2_weight = -INFINITY;
1161 
1162  const char *reason = "existence";
1163 
1164  const GListPtr nodes = (GListPtr) data;
1165  const pe_resource_t *resource1 = a;
1166  const pe_resource_t *resource2 = b;
1167 
1168  pe_node_t *r1_node = NULL;
1169  pe_node_t *r2_node = NULL;
1170  GListPtr gIter = NULL;
1171  GHashTable *r1_nodes = NULL;
1172  GHashTable *r2_nodes = NULL;
1173 
1174  reason = "priority";
1175  r1_weight = resource1->priority;
1176  r2_weight = resource2->priority;
1177 
1178  if (r1_weight > r2_weight) {
1179  rc = -1;
1180  goto done;
1181  }
1182 
1183  if (r1_weight < r2_weight) {
1184  rc = 1;
1185  goto done;
1186  }
1187 
1188  reason = "no node list";
1189  if (nodes == NULL) {
1190  goto done;
1191  }
1192 
1193  r1_nodes = pcmk__native_merge_weights(convert_const_pointer(resource1),
1194  resource1->id, NULL, NULL, 1,
1196  pe__show_node_weights(true, NULL, resource1->id, r1_nodes);
1197 
1198  r2_nodes = pcmk__native_merge_weights(convert_const_pointer(resource2),
1199  resource2->id, NULL, NULL, 1,
1201  pe__show_node_weights(true, NULL, resource2->id, r2_nodes);
1202 
1203  /* Current location score */
1204  reason = "current location";
1205  r1_weight = -INFINITY;
1206  r2_weight = -INFINITY;
1207 
1208  if (resource1->running_on) {
1209  r1_node = pe__current_node(resource1);
1210  r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id);
1211  if (r1_node != NULL) {
1212  r1_weight = r1_node->weight;
1213  }
1214  }
1215  if (resource2->running_on) {
1216  r2_node = pe__current_node(resource2);
1217  r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id);
1218  if (r2_node != NULL) {
1219  r2_weight = r2_node->weight;
1220  }
1221  }
1222 
1223  if (r1_weight > r2_weight) {
1224  rc = -1;
1225  goto done;
1226  }
1227 
1228  if (r1_weight < r2_weight) {
1229  rc = 1;
1230  goto done;
1231  }
1232 
1233  reason = "score";
1234  for (gIter = nodes; gIter != NULL; gIter = gIter->next) {
1235  pe_node_t *node = (pe_node_t *) gIter->data;
1236 
1237  r1_node = NULL;
1238  r2_node = NULL;
1239 
1240  r1_weight = -INFINITY;
1241  if (r1_nodes) {
1242  r1_node = g_hash_table_lookup(r1_nodes, node->details->id);
1243  }
1244  if (r1_node) {
1245  r1_weight = r1_node->weight;
1246  }
1247 
1248  r2_weight = -INFINITY;
1249  if (r2_nodes) {
1250  r2_node = g_hash_table_lookup(r2_nodes, node->details->id);
1251  }
1252  if (r2_node) {
1253  r2_weight = r2_node->weight;
1254  }
1255 
1256  if (r1_weight > r2_weight) {
1257  rc = -1;
1258  goto done;
1259  }
1260 
1261  if (r1_weight < r2_weight) {
1262  rc = 1;
1263  goto done;
1264  }
1265  }
1266 
1267  done:
1268  crm_trace("%s (%d) on %s %c %s (%d) on %s: %s",
1269  resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a",
1270  rc < 0 ? '>' : rc > 0 ? '<' : '=',
1271  resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason);
1272 
1273  if (r1_nodes) {
1274  g_hash_table_destroy(r1_nodes);
1275  }
1276  if (r2_nodes) {
1277  g_hash_table_destroy(r2_nodes);
1278  }
1279 
1280  return rc;
1281 }
1282 
1283 static void
1284 allocate_resources(pe_working_set_t * data_set)
1285 {
1286  GListPtr gIter = NULL;
1287 
1288  if (pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) {
1289  /* Allocate remote connection resources first (which will also allocate
1290  * any colocation dependencies). If the connection is migrating, always
1291  * prefer the partial migration target.
1292  */
1293  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1294  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1295  if (rsc->is_remote_node == FALSE) {
1296  continue;
1297  }
1298  pe_rsc_trace(rsc, "Allocating remote connection resource '%s'",
1299  rsc->id);
1300  rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set);
1301  }
1302  }
1303 
1304  /* now do the rest of the resources */
1305  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1306  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1307  if (rsc->is_remote_node == TRUE) {
1308  continue;
1309  }
1310  pe_rsc_trace(rsc, "Allocating %s resource '%s'",
1311  crm_element_name(rsc->xml), rsc->id);
1312  rsc->cmds->allocate(rsc, NULL, data_set);
1313  }
1314 }
1315 
1316 /* We always use pe_order_preserve with these convenience functions to exempt
1317  * internally generated constraints from the prohibition of user constraints
1318  * involving remote connection resources.
1319  *
1320  * The start ordering additionally uses pe_order_runnable_left so that the
1321  * specified action is not runnable if the start is not runnable.
1322  */
1323 
1324 static inline void
1325 order_start_then_action(pe_resource_t *lh_rsc, pe_action_t *rh_action,
1326  enum pe_ordering extra, pe_working_set_t *data_set)
1327 {
1328  if (lh_rsc && rh_action && data_set) {
1329  custom_action_order(lh_rsc, start_key(lh_rsc), NULL,
1330  rh_action->rsc, NULL, rh_action,
1331  pe_order_preserve | pe_order_runnable_left | extra,
1332  data_set);
1333  }
1334 }
1335 
1336 static inline void
1337 order_action_then_stop(pe_action_t *lh_action, pe_resource_t *rh_rsc,
1338  enum pe_ordering extra, pe_working_set_t *data_set)
1339 {
1340  if (lh_action && rh_rsc && data_set) {
1341  custom_action_order(lh_action->rsc, NULL, lh_action,
1342  rh_rsc, stop_key(rh_rsc), NULL,
1343  pe_order_preserve | extra, data_set);
1344  }
1345 }
1346 
1347 // Clear fail counts for orphaned rsc on all online nodes
1348 static void
1349 cleanup_orphans(pe_resource_t * rsc, pe_working_set_t * data_set)
1350 {
1351  GListPtr gIter = NULL;
1352 
1353  for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1354  pe_node_t *node = (pe_node_t *) gIter->data;
1355 
1356  if (node->details->online
1357  && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
1358  data_set)) {
1359 
1360  pe_action_t *clear_op = NULL;
1361 
1362  clear_op = pe__clear_failcount(rsc, node, "it is orphaned",
1363  data_set);
1364 
1365  /* We can't use order_action_then_stop() here because its
1366  * pe_order_preserve breaks things
1367  */
1368  custom_action_order(clear_op->rsc, NULL, clear_op,
1369  rsc, stop_key(rsc), NULL,
1370  pe_order_optional, data_set);
1371  }
1372  }
1373 }
1374 
1375 gboolean
1377 {
1378  GListPtr gIter = NULL;
1379  int log_prio = show_utilization? LOG_STDOUT : LOG_TRACE;
1380 
1381  if (!pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei)) {
1382  GListPtr nodes = g_list_copy(data_set->nodes);
1383 
1384  nodes = sort_nodes_by_weight(nodes, NULL, data_set);
1385  data_set->resources =
1386  g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes);
1387 
1388  g_list_free(nodes);
1389  }
1390 
1391  gIter = data_set->nodes;
1392  for (; gIter != NULL; gIter = gIter->next) {
1393  pe_node_t *node = (pe_node_t *) gIter->data;
1394 
1395  dump_node_capacity(log_prio, "Original", node);
1396  }
1397 
1398  crm_trace("Allocating services");
1399  /* Take (next) highest resource, assign it and create its actions */
1400 
1401  allocate_resources(data_set);
1402 
1403  gIter = data_set->nodes;
1404  for (; gIter != NULL; gIter = gIter->next) {
1405  pe_node_t *node = (pe_node_t *) gIter->data;
1406 
1407  dump_node_capacity(log_prio, "Remaining", node);
1408  }
1409 
1410  // Process deferred action checks
1411  pe__foreach_param_check(data_set, check_params);
1412  pe__free_param_checks(data_set);
1413 
1414  if (pcmk_is_set(data_set->flags, pe_flag_startup_probes)) {
1415  crm_trace("Calculating needed probes");
1416  /* This code probably needs optimization
1417  * ptest -x with 100 nodes, 100 clones and clone-max=100:
1418 
1419  With probes:
1420 
1421  ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
1422  ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
1423  ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
1424  ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions
1425  ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
1426  ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services
1427  ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes
1428  ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions
1429  ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done
1430  ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
1431  ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
1432  36s
1433  ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
1434 
1435  Without probes:
1436 
1437  ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
1438  ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
1439  ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
1440  ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions
1441  ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
1442  ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services
1443  ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions
1444  ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done
1445  ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
1446  ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
1447  ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
1448  */
1449 
1450  probe_resources(data_set);
1451  }
1452 
1453  crm_trace("Handle orphans");
1454  if (pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)) {
1455  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1456  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1457 
1458  /* There's no need to recurse into rsc->children because those
1459  * should just be unallocated clone instances.
1460  */
1461  if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
1462  cleanup_orphans(rsc, data_set);
1463  }
1464  }
1465  }
1466 
1467  crm_trace("Creating actions");
1468 
1469  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1470  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1471 
1472  rsc->cmds->create_actions(rsc, data_set);
1473  }
1474 
1475  crm_trace("Creating done");
1476  return TRUE;
1477 }
1478 
1479 static gboolean
1480 is_managed(const pe_resource_t * rsc)
1481 {
1482  GListPtr gIter = rsc->children;
1483 
1484  if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
1485  return TRUE;
1486  }
1487 
1488  for (; gIter != NULL; gIter = gIter->next) {
1489  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
1490 
1491  if (is_managed(child_rsc)) {
1492  return TRUE;
1493  }
1494  }
1495 
1496  return FALSE;
1497 }
1498 
1499 static gboolean
1500 any_managed_resources(pe_working_set_t * data_set)
1501 {
1502 
1503  GListPtr gIter = data_set->resources;
1504 
1505  for (; gIter != NULL; gIter = gIter->next) {
1506  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1507 
1508  if (is_managed(rsc)) {
1509  return TRUE;
1510  }
1511  }
1512  return FALSE;
1513 }
1514 
1522 static void
1523 fence_guest(pe_node_t *node, pe_working_set_t *data_set)
1524 {
1525  pe_resource_t *container = node->details->remote_rsc->container;
1526  pe_action_t *stop = NULL;
1527  pe_action_t *stonith_op = NULL;
1528 
1529  /* The fence action is just a label; we don't do anything differently for
1530  * off vs. reboot. We specify it explicitly, rather than let it default to
1531  * cluster's default action, because we are not _initiating_ fencing -- we
1532  * are creating a pseudo-event to describe fencing that is already occurring
1533  * by other means (container recovery).
1534  */
1535  const char *fence_action = "off";
1536 
1537  /* Check whether guest's container resource has any explicit stop or
1538  * start (the stop may be implied by fencing of the guest's host).
1539  */
1540  if (container) {
1541  stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL);
1542 
1543  if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) {
1544  fence_action = "reboot";
1545  }
1546  }
1547 
1548  /* Create a fence pseudo-event, so we have an event to order actions
1549  * against, and the controller can always detect it.
1550  */
1551  stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", FALSE, data_set);
1553  __func__, __LINE__);
1554 
1555  /* We want to imply stops/demotes after the guest is stopped, not wait until
1556  * it is restarted, so we always order pseudo-fencing after stop, not start
1557  * (even though start might be closer to what is done for a real reboot).
1558  */
1559  if ((stop != NULL) && pcmk_is_set(stop->flags, pe_action_pseudo)) {
1560  pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, FALSE, data_set);
1561  crm_info("Implying guest node %s is down (action %d) after %s fencing",
1562  node->details->uname, stonith_op->id, stop->node->details->uname);
1563  order_actions(parent_stonith_op, stonith_op,
1565 
1566  } else if (stop) {
1567  order_actions(stop, stonith_op,
1569  crm_info("Implying guest node %s is down (action %d) "
1570  "after container %s is stopped (action %d)",
1571  node->details->uname, stonith_op->id,
1572  container->id, stop->id);
1573  } else {
1574  /* If we're fencing the guest node but there's no stop for the guest
1575  * resource, we must think the guest is already stopped. However, we may
1576  * think so because its resource history was just cleaned. To avoid
1577  * unnecessarily considering the guest node down if it's really up,
1578  * order the pseudo-fencing after any stop of the connection resource,
1579  * which will be ordered after any container (re-)probe.
1580  */
1581  stop = find_first_action(node->details->remote_rsc->actions, NULL,
1582  RSC_STOP, NULL);
1583 
1584  if (stop) {
1585  order_actions(stop, stonith_op, pe_order_optional);
1586  crm_info("Implying guest node %s is down (action %d) "
1587  "after connection is stopped (action %d)",
1588  node->details->uname, stonith_op->id, stop->id);
1589  } else {
1590  /* Not sure why we're fencing, but everything must already be
1591  * cleanly stopped.
1592  */
1593  crm_info("Implying guest node %s is down (action %d) ",
1594  node->details->uname, stonith_op->id);
1595  }
1596  }
1597 
1598  /* Order/imply other actions relative to pseudo-fence as with real fence */
1599  pcmk__order_vs_fence(stonith_op, data_set);
1600 }
1601 
1602 /*
1603  * Create dependencies for stonith and shutdown operations
1604  */
1605 gboolean
1607 {
1608  pe_action_t *dc_down = NULL;
1609  pe_action_t *stonith_op = NULL;
1610  gboolean integrity_lost = FALSE;
1611  gboolean need_stonith = TRUE;
1612  GListPtr gIter;
1613  GListPtr stonith_ops = NULL;
1614  GList *shutdown_ops = NULL;
1615 
1616  /* Remote ordering constraints need to happen prior to calculating fencing
1617  * because it is one more place we will mark the node as dirty.
1618  *
1619  * A nice side effect of doing them early is that apply_*_ordering() can be
1620  * simpler because pe_fence_node() has already done some of the work.
1621  */
1622  crm_trace("Creating remote ordering constraints");
1623  apply_remote_node_ordering(data_set);
1624 
1625  crm_trace("Processing fencing and shutdown cases");
1626  if (any_managed_resources(data_set) == FALSE) {
1627  crm_notice("Delaying fencing operations until there are resources to manage");
1628  need_stonith = FALSE;
1629  }
1630 
1631  /* Check each node for stonith/shutdown */
1632  for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1633  pe_node_t *node = (pe_node_t *) gIter->data;
1634 
1635  /* Guest nodes are "fenced" by recovering their container resource,
1636  * so handle them separately.
1637  */
1638  if (pe__is_guest_node(node)) {
1639  if (node->details->remote_requires_reset && need_stonith
1640  && pe_can_fence(data_set, node)) {
1641  fence_guest(node, data_set);
1642  }
1643  continue;
1644  }
1645 
1646  stonith_op = NULL;
1647 
1648  if (node->details->unclean
1649  && need_stonith && pe_can_fence(data_set, node)) {
1650 
1651  stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, data_set);
1652  pe_warn("Scheduling Node %s for STONITH", node->details->uname);
1653 
1654  pcmk__order_vs_fence(stonith_op, data_set);
1655 
1656  if (node->details->is_dc) {
1657  // Remember if the DC is being fenced
1658  dc_down = stonith_op;
1659 
1660  } else {
1661 
1663  && (stonith_ops != NULL)) {
1664  /* Concurrent fencing is disabled, so order each non-DC
1665  * fencing in a chain. If there is any DC fencing or
1666  * shutdown, it will be ordered after the last action in the
1667  * chain later.
1668  */
1669  order_actions((pe_action_t *) stonith_ops->data,
1670  stonith_op, pe_order_optional);
1671  }
1672 
1673  // Remember all non-DC fencing actions in a separate list
1674  stonith_ops = g_list_prepend(stonith_ops, stonith_op);
1675  }
1676 
1677  } else if (node->details->online && node->details->shutdown &&
1678  /* TODO define what a shutdown op means for a remote node.
1679  * For now we do not send shutdown operations for remote nodes, but
1680  * if we can come up with a good use for this in the future, we will. */
1681  pe__is_guest_or_remote_node(node) == FALSE) {
1682 
1683  pe_action_t *down_op = sched_shutdown_op(node, data_set);
1684 
1685  if (node->details->is_dc) {
1686  // Remember if the DC is being shut down
1687  dc_down = down_op;
1688  } else {
1689  // Remember non-DC shutdowns for later ordering
1690  shutdown_ops = g_list_prepend(shutdown_ops, down_op);
1691  }
1692  }
1693 
1694  if (node->details->unclean && stonith_op == NULL) {
1695  integrity_lost = TRUE;
1696  pe_warn("Node %s is unclean!", node->details->uname);
1697  }
1698  }
1699 
1700  if (integrity_lost) {
1701  if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
1702  pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED");
1703  pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE");
1704 
1705  } else if (!pcmk_is_set(data_set->flags, pe_flag_have_quorum)) {
1706  crm_notice("Cannot fence unclean nodes until quorum is"
1707  " attained (or no-quorum-policy is set to ignore)");
1708  }
1709  }
1710 
1711  if (dc_down != NULL) {
1712  /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated
1713  * DC elections. However, we don't want to order non-DC shutdowns before
1714  * a DC *fencing*, because even though we don't want a node that's
1715  * shutting down to become DC, the DC fencing could be ordered before a
1716  * clone stop that's also ordered before the shutdowns, thus leading to
1717  * a graph loop.
1718  */
1719  if (pcmk__str_eq(dc_down->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
1720  for (gIter = shutdown_ops; gIter != NULL; gIter = gIter->next) {
1721  pe_action_t *node_stop = (pe_action_t *) gIter->data;
1722 
1723  crm_debug("Ordering shutdown on %s before %s on DC %s",
1724  node_stop->node->details->uname,
1725  dc_down->task, dc_down->node->details->uname);
1726 
1727  order_actions(node_stop, dc_down, pe_order_optional);
1728  }
1729  }
1730 
1731  // Order any non-DC fencing before any DC fencing or shutdown
1732 
1733  if (pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing)) {
1734  /* With concurrent fencing, order each non-DC fencing action
1735  * separately before any DC fencing or shutdown.
1736  */
1737  for (gIter = stonith_ops; gIter != NULL; gIter = gIter->next) {
1738  order_actions((pe_action_t *) gIter->data, dc_down,
1740  }
1741  } else if (stonith_ops) {
1742  /* Without concurrent fencing, the non-DC fencing actions are
1743  * already ordered relative to each other, so we just need to order
1744  * the DC fencing after the last action in the chain (which is the
1745  * first item in the list).
1746  */
1747  order_actions((pe_action_t *) stonith_ops->data, dc_down,
1749  }
1750  }
1751  g_list_free(stonith_ops);
1752  g_list_free(shutdown_ops);
1753  return TRUE;
1754 }
1755 
1756 /*
1757  * Determine the sets of independent actions and the correct order for the
1758  * actions in each set.
1759  *
1760  * Mark dependencies of un-runnable actions un-runnable
1761  *
1762  */
1763 static GListPtr
1764 find_actions_by_task(GListPtr actions, pe_resource_t * rsc, const char *original_key)
1765 {
1766  GListPtr list = NULL;
1767 
1768  list = find_actions(actions, original_key, NULL);
1769  if (list == NULL) {
1770  /* we're potentially searching a child of the original resource */
1771  char *key = NULL;
1772  char *task = NULL;
1773  guint interval_ms = 0;
1774 
1775  if (parse_op_key(original_key, NULL, &task, &interval_ms)) {
1776  key = pcmk__op_key(rsc->id, task, interval_ms);
1777  list = find_actions(actions, key, NULL);
1778 
1779  } else {
1780  crm_err("search key: %s", original_key);
1781  }
1782 
1783  free(key);
1784  free(task);
1785  }
1786 
1787  return list;
1788 }
1789 
1790 static void
1791 rsc_order_then(pe_action_t *lh_action, pe_resource_t *rsc,
1792  pe__ordering_t *order)
1793 {
1794  GListPtr gIter = NULL;
1795  GListPtr rh_actions = NULL;
1796  pe_action_t *rh_action = NULL;
1797  enum pe_ordering type;
1798 
1799  CRM_CHECK(rsc != NULL, return);
1800  CRM_CHECK(order != NULL, return);
1801 
1802  type = order->type;
1803  rh_action = order->rh_action;
1804  crm_trace("Processing RH of ordering constraint %d", order->id);
1805 
1806  if (rh_action != NULL) {
1807  rh_actions = g_list_prepend(NULL, rh_action);
1808 
1809  } else if (rsc != NULL) {
1810  rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task);
1811  }
1812 
1813  if (rh_actions == NULL) {
1814  pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..."
1815  " ignoring", rsc->id, order->rh_action_task);
1816  if (lh_action) {
1817  pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid);
1818  }
1819  return;
1820  }
1821 
1822  if ((lh_action != NULL) && (lh_action->rsc == rsc)
1823  && pcmk_is_set(lh_action->flags, pe_action_dangle)) {
1824 
1825  pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid,
1826  order->rh_action_task);
1828  }
1829 
1830  gIter = rh_actions;
1831  for (; gIter != NULL; gIter = gIter->next) {
1832  pe_action_t *rh_action_iter = (pe_action_t *) gIter->data;
1833 
1834  if (lh_action) {
1835  order_actions(lh_action, rh_action_iter, type);
1836 
1837  } else if (type & pe_order_implies_then) {
1839  __func__, __LINE__);
1840  crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type);
1841  } else {
1842  crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type);
1843  }
1844  }
1845 
1846  g_list_free(rh_actions);
1847 }
1848 
1849 static void
1850 rsc_order_first(pe_resource_t *lh_rsc, pe__ordering_t *order,
1851  pe_working_set_t *data_set)
1852 {
1853  GListPtr gIter = NULL;
1854  GListPtr lh_actions = NULL;
1855  pe_action_t *lh_action = order->lh_action;
1856  pe_resource_t *rh_rsc = order->rh_rsc;
1857 
1858  crm_trace("Processing LH of ordering constraint %d", order->id);
1859  CRM_ASSERT(lh_rsc != NULL);
1860 
1861  if (lh_action != NULL) {
1862  lh_actions = g_list_prepend(NULL, lh_action);
1863 
1864  } else {
1865  lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task);
1866  }
1867 
1868  if (lh_actions == NULL && lh_rsc != rh_rsc) {
1869  char *key = NULL;
1870  char *op_type = NULL;
1871  guint interval_ms = 0;
1872 
1873  parse_op_key(order->lh_action_task, NULL, &op_type, &interval_ms);
1874  key = pcmk__op_key(lh_rsc->id, op_type, interval_ms);
1875 
1876  if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && pcmk__str_eq(op_type, RSC_STOP, pcmk__str_casei)) {
1877  free(key);
1878  pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
1879  lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1880 
1881  } else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && pcmk__str_eq(op_type, RSC_DEMOTE, pcmk__str_casei)) {
1882  free(key);
1883  pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
1884  lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1885 
1886  } else {
1887  pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating",
1888  lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1889  lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set);
1890  lh_actions = g_list_prepend(NULL, lh_action);
1891  }
1892 
1893  free(op_type);
1894  }
1895 
1896  gIter = lh_actions;
1897  for (; gIter != NULL; gIter = gIter->next) {
1898  pe_action_t *lh_action_iter = (pe_action_t *) gIter->data;
1899 
1900  if (rh_rsc == NULL && order->rh_action) {
1901  rh_rsc = order->rh_action->rsc;
1902  }
1903  if (rh_rsc) {
1904  rsc_order_then(lh_action_iter, rh_rsc, order);
1905 
1906  } else if (order->rh_action) {
1907  order_actions(lh_action_iter, order->rh_action, order->type);
1908  }
1909  }
1910 
1911  g_list_free(lh_actions);
1912 }
1913 
1915  pe_working_set_t *data_set);
1916 
1917 static int
1918 is_recurring_action(pe_action_t *action)
1919 {
1920  guint interval_ms;
1921 
1922  if (pcmk__guint_from_hash(action->meta,
1924  &interval_ms) != pcmk_rc_ok) {
1925  return 0;
1926  }
1927  return (interval_ms > 0);
1928 }
1929 
1930 static void
1931 apply_container_ordering(pe_action_t *action, pe_working_set_t *data_set)
1932 {
1933  /* VMs are also classified as containers for these purposes... in
1934  * that they both involve a 'thing' running on a real or remote
1935  * cluster node.
1936  *
1937  * This allows us to be smarter about the type and extent of
1938  * recovery actions required in various scenarios
1939  */
1940  pe_resource_t *remote_rsc = NULL;
1941  pe_resource_t *container = NULL;
1942  enum action_tasks task = text2task(action->task);
1943 
1944  CRM_ASSERT(action->rsc);
1945  CRM_ASSERT(action->node);
1947 
1948  remote_rsc = action->node->details->remote_rsc;
1949  CRM_ASSERT(remote_rsc);
1950 
1951  container = remote_rsc->container;
1952  CRM_ASSERT(container);
1953 
1954  if (pcmk_is_set(container->flags, pe_rsc_failed)) {
1955  pe_fence_node(data_set, action->node, "container failed", FALSE);
1956  }
1957 
1958  crm_trace("Order %s action %s relative to %s%s for %s%s",
1959  action->task, action->uuid,
1960  pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
1961  remote_rsc->id,
1962  pcmk_is_set(container->flags, pe_rsc_failed)? "failed " : "",
1963  container->id);
1964 
1966  /* Migration ops map to "no_action", but we need to apply the same
1967  * ordering as for stop or demote (see get_router_node()).
1968  */
1969  task = stop_rsc;
1970  }
1971 
1972  switch (task) {
1973  case start_rsc:
1974  case action_promote:
1975  /* Force resource recovery if the container is recovered */
1976  order_start_then_action(container, action, pe_order_implies_then,
1977  data_set);
1978 
1979  /* Wait for the connection resource to be up too */
1980  order_start_then_action(remote_rsc, action, pe_order_none,
1981  data_set);
1982  break;
1983 
1984  case stop_rsc:
1985  case action_demote:
1986  if (pcmk_is_set(container->flags, pe_rsc_failed)) {
1987  /* When the container representing a guest node fails, any stop
1988  * or demote actions for resources running on the guest node
1989  * are implied by the container stopping. This is similar to
1990  * how fencing operations work for cluster nodes and remote
1991  * nodes.
1992  */
1993  } else {
1994  /* Ensure the operation happens before the connection is brought
1995  * down.
1996  *
1997  * If we really wanted to, we could order these after the
1998  * connection start, IFF the container's current role was
1999  * stopped (otherwise we re-introduce an ordering loop when the
2000  * connection is restarting).
2001  */
2002  order_action_then_stop(action, remote_rsc, pe_order_none,
2003  data_set);
2004  }
2005  break;
2006 
2007  default:
2008  /* Wait for the connection resource to be up */
2009  if (is_recurring_action(action)) {
2010  /* In case we ever get the recovery logic wrong, force
2011  * recurring monitors to be restarted, even if just
2012  * the connection was re-established
2013  */
2014  if(task != no_action) {
2015  order_start_then_action(remote_rsc, action,
2016  pe_order_implies_then, data_set);
2017  }
2018  } else {
2019  order_start_then_action(remote_rsc, action, pe_order_none,
2020  data_set);
2021  }
2022  break;
2023  }
2024 }
2025 
2026 static enum remote_connection_state
2027 get_remote_node_state(pe_node_t *node)
2028 {
2029  pe_resource_t *remote_rsc = NULL;
2030  pe_node_t *cluster_node = NULL;
2031 
2032  CRM_ASSERT(node);
2033 
2034  remote_rsc = node->details->remote_rsc;
2035  CRM_ASSERT(remote_rsc);
2036 
2037  cluster_node = pe__current_node(remote_rsc);
2038 
2039  /* If the cluster node the remote connection resource resides on
2040  * is unclean or went offline, we can't process any operations
2041  * on that remote node until after it starts elsewhere.
2042  */
2043  if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) {
2044  /* The connection resource is not going to run anywhere */
2045 
2046  if (cluster_node && cluster_node->details->unclean) {
2047  /* The remote connection is failed because its resource is on a
2048  * failed node and can't be recovered elsewhere, so we must fence.
2049  */
2050  return remote_state_failed;
2051  }
2052 
2053  if (!pcmk_is_set(remote_rsc->flags, pe_rsc_failed)) {
2054  /* Connection resource is cleanly stopped */
2055  return remote_state_stopped;
2056  }
2057 
2058  /* Connection resource is failed */
2059 
2060  if ((remote_rsc->next_role == RSC_ROLE_STOPPED)
2061  && remote_rsc->remote_reconnect_ms
2062  && node->details->remote_was_fenced
2063  && !pe__shutdown_requested(node)) {
2064 
2065  /* We won't know whether the connection is recoverable until the
2066  * reconnect interval expires and we reattempt connection.
2067  */
2068  return remote_state_unknown;
2069  }
2070 
2071  /* The remote connection is in a failed state. If there are any
2072  * resources known to be active on it (stop) or in an unknown state
2073  * (probe), we must assume the worst and fence it.
2074  */
2075  return remote_state_failed;
2076 
2077  } else if (cluster_node == NULL) {
2078  /* Connection is recoverable but not currently running anywhere, see if we can recover it first */
2079  return remote_state_unknown;
2080 
2081  } else if(cluster_node->details->unclean == TRUE
2082  || cluster_node->details->online == FALSE) {
2083  /* Connection is running on a dead node, see if we can recover it first */
2084  return remote_state_resting;
2085 
2086  } else if (pcmk__list_of_multiple(remote_rsc->running_on)
2087  && remote_rsc->partial_migration_source
2088  && remote_rsc->partial_migration_target) {
2089  /* We're in the middle of migrating a connection resource,
2090  * wait until after the resource migrates before performing
2091  * any actions.
2092  */
2093  return remote_state_resting;
2094 
2095  }
2096  return remote_state_alive;
2097 }
2098 
2103 static void
2104 apply_remote_ordering(pe_action_t *action, pe_working_set_t *data_set)
2105 {
2106  pe_resource_t *remote_rsc = NULL;
2107  enum action_tasks task = text2task(action->task);
2108  enum remote_connection_state state = get_remote_node_state(action->node);
2109 
2110  enum pe_ordering order_opts = pe_order_none;
2111 
2112  if (action->rsc == NULL) {
2113  return;
2114  }
2115 
2116  CRM_ASSERT(action->node);
2118 
2119  remote_rsc = action->node->details->remote_rsc;
2120  CRM_ASSERT(remote_rsc);
2121 
2122  crm_trace("Order %s action %s relative to %s%s (state: %s)",
2123  action->task, action->uuid,
2124  pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
2125  remote_rsc->id, state2text(state));
2126 
2128  /* Migration ops map to "no_action", but we need to apply the same
2129  * ordering as for stop or demote (see get_router_node()).
2130  */
2131  task = stop_rsc;
2132  }
2133 
2134  switch (task) {
2135  case start_rsc:
2136  case action_promote:
2137  order_opts = pe_order_none;
2138 
2139  if (state == remote_state_failed) {
2140  /* Force recovery, by making this action required */
2142  }
2143 
2144  /* Ensure connection is up before running this action */
2145  order_start_then_action(remote_rsc, action, order_opts, data_set);
2146  break;
2147 
2148  case stop_rsc:
2149  if(state == remote_state_alive) {
2150  order_action_then_stop(action, remote_rsc,
2151  pe_order_implies_first, data_set);
2152 
2153  } else if(state == remote_state_failed) {
2154  /* The resource is active on the node, but since we don't have a
2155  * valid connection, the only way to stop the resource is by
2156  * fencing the node. There is no need to order the stop relative
2157  * to the remote connection, since the stop will become implied
2158  * by the fencing.
2159  */
2160  pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable", FALSE);
2161 
2162  } else if(remote_rsc->next_role == RSC_ROLE_STOPPED) {
2163  /* State must be remote_state_unknown or remote_state_stopped.
2164  * Since the connection is not coming back up in this
2165  * transition, stop this resource first.
2166  */
2167  order_action_then_stop(action, remote_rsc,
2168  pe_order_implies_first, data_set);
2169 
2170  } else {
2171  /* The connection is going to be started somewhere else, so
2172  * stop this resource after that completes.
2173  */
2174  order_start_then_action(remote_rsc, action, pe_order_none, data_set);
2175  }
2176  break;
2177 
2178  case action_demote:
2179  /* Only order this demote relative to the connection start if the
2180  * connection isn't being torn down. Otherwise, the demote would be
2181  * blocked because the connection start would not be allowed.
2182  */
2183  if(state == remote_state_resting || state == remote_state_unknown) {
2184  order_start_then_action(remote_rsc, action, pe_order_none,
2185  data_set);
2186  } /* Otherwise we can rely on the stop ordering */
2187  break;
2188 
2189  default:
2190  /* Wait for the connection resource to be up */
2191  if (is_recurring_action(action)) {
2192  /* In case we ever get the recovery logic wrong, force
2193  * recurring monitors to be restarted, even if just
2194  * the connection was re-established
2195  */
2196  order_start_then_action(remote_rsc, action,
2197  pe_order_implies_then, data_set);
2198 
2199  } else {
2200  pe_node_t *cluster_node = pe__current_node(remote_rsc);
2201 
2202  if(task == monitor_rsc && state == remote_state_failed) {
2203  /* We would only be here if we do not know the
2204  * state of the resource on the remote node.
2205  * Since we have no way to find out, it is
2206  * necessary to fence the node.
2207  */
2208  pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable", FALSE);
2209  }
2210 
2211  if(cluster_node && state == remote_state_stopped) {
2212  /* The connection is currently up, but is going
2213  * down permanently.
2214  *
2215  * Make sure we check services are actually
2216  * stopped _before_ we let the connection get
2217  * closed
2218  */
2219  order_action_then_stop(action, remote_rsc,
2220  pe_order_runnable_left, data_set);
2221 
2222  } else {
2223  order_start_then_action(remote_rsc, action, pe_order_none,
2224  data_set);
2225  }
2226  }
2227  break;
2228  }
2229 }
2230 
2231 static void
2232 apply_remote_node_ordering(pe_working_set_t *data_set)
2233 {
2234  if (!pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) {
2235  return;
2236  }
2237 
2238  for (GListPtr gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2239  pe_action_t *action = (pe_action_t *) gIter->data;
2240  pe_resource_t *remote = NULL;
2241 
2242  // We are only interested in resource actions
2243  if (action->rsc == NULL) {
2244  continue;
2245  }
2246 
2247  /* Special case: If we are clearing the failcount of an actual
2248  * remote connection resource, then make sure this happens before
2249  * any start of the resource in this transition.
2250  */
2251  if (action->rsc->is_remote_node &&
2252  pcmk__str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT, pcmk__str_casei)) {
2253 
2255  NULL,
2256  action,
2257  action->rsc,
2258  pcmk__op_key(action->rsc->id, RSC_START, 0),
2259  NULL,
2261  data_set);
2262 
2263  continue;
2264  }
2265 
2266  // We are only interested in actions allocated to a node
2267  if (action->node == NULL) {
2268  continue;
2269  }
2270 
2271  if (!pe__is_guest_or_remote_node(action->node)) {
2272  continue;
2273  }
2274 
2275  /* We are only interested in real actions.
2276  *
2277  * @TODO This is probably wrong; pseudo-actions might be converted to
2278  * real actions and vice versa later in update_actions() at the end of
2279  * stage7().
2280  */
2281  if (pcmk_is_set(action->flags, pe_action_pseudo)) {
2282  continue;
2283  }
2284 
2285  remote = action->node->details->remote_rsc;
2286  if (remote == NULL) {
2287  // Orphaned
2288  continue;
2289  }
2290 
2291  /* Another special case: if a resource is moving to a Pacemaker Remote
2292  * node, order the stop on the original node after any start of the
2293  * remote connection. This ensures that if the connection fails to
2294  * start, we leave the resource running on the original node.
2295  */
2296  if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) {
2297  for (GList *item = action->rsc->actions; item != NULL;
2298  item = item->next) {
2299  pe_action_t *rsc_action = item->data;
2300 
2301  if ((rsc_action->node->details != action->node->details)
2302  && pcmk__str_eq(rsc_action->task, RSC_STOP, pcmk__str_casei)) {
2303  custom_action_order(remote, start_key(remote), NULL,
2304  action->rsc, NULL, rsc_action,
2305  pe_order_optional, data_set);
2306  }
2307  }
2308  }
2309 
2310  /* The action occurs across a remote connection, so create
2311  * ordering constraints that guarantee the action occurs while the node
2312  * is active (after start, before stop ... things like that).
2313  *
2314  * This is somewhat brittle in that we need to make sure the results of
2315  * this ordering are compatible with the result of get_router_node().
2316  * It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part
2317  * of this logic rather than action2xml().
2318  */
2319  if (remote->container) {
2320  crm_trace("Container ordering for %s", action->uuid);
2321  apply_container_ordering(action, data_set);
2322 
2323  } else {
2324  crm_trace("Remote ordering for %s", action->uuid);
2325  apply_remote_ordering(action, data_set);
2326  }
2327  }
2328 }
2329 
2330 static gboolean
2331 order_first_probe_unneeded(pe_action_t * probe, pe_action_t * rh_action)
2332 {
2333  /* No need to probe the resource on the node that is being
2334  * unfenced. Otherwise it might introduce transition loop
2335  * since probe will be performed after the node is
2336  * unfenced.
2337  */
2338  if (pcmk__str_eq(rh_action->task, CRM_OP_FENCE, pcmk__str_casei)
2339  && probe->node && rh_action->node
2340  && probe->node->details == rh_action->node->details) {
2341  const char *op = g_hash_table_lookup(rh_action->meta, "stonith_action");
2342 
2343  if (pcmk__str_eq(op, "on", pcmk__str_casei)) {
2344  return TRUE;
2345  }
2346  }
2347 
2348  // Shutdown waits for probe to complete only if it's on the same node
2349  if ((pcmk__str_eq(rh_action->task, CRM_OP_SHUTDOWN, pcmk__str_casei))
2350  && probe->node && rh_action->node
2351  && probe->node->details != rh_action->node->details) {
2352  return TRUE;
2353  }
2354  return FALSE;
2355 }
2356 
2357 static void
2358 order_first_probes_imply_stops(pe_working_set_t * data_set)
2359 {
2360  GListPtr gIter = NULL;
2361 
2362  for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
2363  pe__ordering_t *order = gIter->data;
2364  enum pe_ordering order_type = pe_order_optional;
2365 
2366  pe_resource_t *lh_rsc = order->lh_rsc;
2367  pe_resource_t *rh_rsc = order->rh_rsc;
2368  pe_action_t *lh_action = order->lh_action;
2369  pe_action_t *rh_action = order->rh_action;
2370  const char *lh_action_task = order->lh_action_task;
2371  const char *rh_action_task = order->rh_action_task;
2372 
2373  GListPtr probes = NULL;
2374  GListPtr rh_actions = NULL;
2375 
2376  GListPtr pIter = NULL;
2377 
2378  if (lh_rsc == NULL) {
2379  continue;
2380 
2381  } else if (rh_rsc && lh_rsc == rh_rsc) {
2382  continue;
2383  }
2384 
2385  if (lh_action == NULL && lh_action_task == NULL) {
2386  continue;
2387  }
2388 
2389  if (rh_action == NULL && rh_action_task == NULL) {
2390  continue;
2391  }
2392 
2393  /* Technically probe is expected to return "not running", which could be
2394  * the alternative of stop action if the status of the resource is
2395  * unknown yet.
2396  */
2397  if (lh_action && !pcmk__str_eq(lh_action->task, RSC_STOP, pcmk__str_casei)) {
2398  continue;
2399 
2400  } else if (lh_action == NULL
2401  && lh_action_task
2402  && !pcmk__ends_with(lh_action_task, "_" RSC_STOP "_0")) {
2403  continue;
2404  }
2405 
2406  /* Do not probe the resource inside of a stopping container. Otherwise
2407  * it might introduce transition loop since probe will be performed
2408  * after the container starts again.
2409  */
2410  if (rh_rsc && lh_rsc->container == rh_rsc) {
2411  if (rh_action && pcmk__str_eq(rh_action->task, RSC_STOP, pcmk__str_casei)) {
2412  continue;
2413 
2414  } else if (rh_action == NULL && rh_action_task
2415  && pcmk__ends_with(rh_action_task,"_" RSC_STOP "_0")) {
2416  continue;
2417  }
2418  }
2419 
2420  if (order->type == pe_order_none) {
2421  continue;
2422  }
2423 
2424  // Preserve the order options for future filtering
2426  pe__set_order_flags(order_type,
2428  }
2429 
2430  if (pcmk_is_set(order->type, pe_order_same_node)) {
2432  }
2433 
2434  // Keep the order types for future filtering
2435  if (order->type == pe_order_anti_colocation
2436  || order->type == pe_order_load) {
2437  order_type = order->type;
2438  }
2439 
2440  probes = pe__resource_actions(lh_rsc, NULL, RSC_STATUS, FALSE);
2441  if (probes == NULL) {
2442  continue;
2443  }
2444 
2445  if (rh_action) {
2446  rh_actions = g_list_prepend(rh_actions, rh_action);
2447 
2448  } else if (rh_rsc && rh_action_task) {
2449  rh_actions = find_actions(rh_rsc->actions, rh_action_task, NULL);
2450  }
2451 
2452  if (rh_actions == NULL) {
2453  g_list_free(probes);
2454  continue;
2455  }
2456 
2457  crm_trace("Processing for LH probe based on ordering constraint %s -> %s"
2458  " (id=%d, type=%.6x)",
2459  lh_action ? lh_action->uuid : lh_action_task,
2460  rh_action ? rh_action->uuid : rh_action_task,
2461  order->id, order->type);
2462 
2463  for (pIter = probes; pIter != NULL; pIter = pIter->next) {
2464  pe_action_t *probe = (pe_action_t *) pIter->data;
2465  GListPtr rIter = NULL;
2466 
2467  for (rIter = rh_actions; rIter != NULL; rIter = rIter->next) {
2468  pe_action_t *rh_action_iter = (pe_action_t *) rIter->data;
2469 
2470  if (order_first_probe_unneeded(probe, rh_action_iter)) {
2471  continue;
2472  }
2473  order_actions(probe, rh_action_iter, order_type);
2474  }
2475  }
2476 
2477  g_list_free(rh_actions);
2478  g_list_free(probes);
2479  }
2480 }
2481 
2482 static void
2483 order_first_probe_then_restart_repromote(pe_action_t * probe,
2484  pe_action_t * after,
2485  pe_working_set_t * data_set)
2486 {
2487  GListPtr gIter = NULL;
2488  bool interleave = FALSE;
2489  pe_resource_t *compatible_rsc = NULL;
2490 
2491  if (probe == NULL
2492  || probe->rsc == NULL
2493  || probe->rsc->variant != pe_native) {
2494  return;
2495  }
2496 
2497  if (after == NULL
2498  // Avoid running into any possible loop
2499  || pcmk_is_set(after->flags, pe_action_tracking)) {
2500  return;
2501  }
2502 
2503  if (!pcmk__str_eq(probe->task, RSC_STATUS, pcmk__str_casei)) {
2504  return;
2505  }
2506 
2508 
2509  crm_trace("Processing based on %s %s -> %s %s",
2510  probe->uuid,
2511  probe->node ? probe->node->details->uname: "",
2512  after->uuid,
2513  after->node ? after->node->details->uname : "");
2514 
2515  if (after->rsc
2516  /* Better not build a dependency directly with a clone/group.
2517  * We are going to proceed through the ordering chain and build
2518  * dependencies with its children.
2519  */
2520  && after->rsc->variant == pe_native
2521  && probe->rsc != after->rsc) {
2522 
2523  GListPtr then_actions = NULL;
2524  enum pe_ordering probe_order_type = pe_order_optional;
2525 
2526  if (pcmk__str_eq(after->task, RSC_START, pcmk__str_casei)) {
2527  then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP, FALSE);
2528 
2529  } else if (pcmk__str_eq(after->task, RSC_PROMOTE, pcmk__str_casei)) {
2530  then_actions = pe__resource_actions(after->rsc, NULL, RSC_DEMOTE, FALSE);
2531  }
2532 
2533  for (gIter = then_actions; gIter != NULL; gIter = gIter->next) {
2534  pe_action_t *then = (pe_action_t *) gIter->data;
2535 
2536  // Skip any pseudo action which for example is implied by fencing
2537  if (pcmk_is_set(then->flags, pe_action_pseudo)) {
2538  continue;
2539  }
2540 
2541  order_actions(probe, then, probe_order_type);
2542  }
2543  g_list_free(then_actions);
2544  }
2545 
2546  if (after->rsc
2547  && after->rsc->variant > pe_group) {
2548  const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
2550 
2551  interleave = crm_is_true(interleave_s);
2552 
2553  if (interleave) {
2554  /* For an interleaved clone, we should build a dependency only
2555  * with the relevant clone child.
2556  */
2557  compatible_rsc = find_compatible_child(probe->rsc,
2558  after->rsc,
2560  FALSE, data_set);
2561  }
2562  }
2563 
2564  for (gIter = after->actions_after; gIter != NULL; gIter = gIter->next) {
2565  pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) gIter->data;
2566  /* pe_order_implies_then is the reason why a required A.start
2567  * implies/enforces B.start to be required too, which is the cause of
2568  * B.restart/re-promote.
2569  *
2570  * Not sure about pe_order_implies_then_on_node though. It's now only
2571  * used for unfencing case, which tends to introduce transition
2572  * loops...
2573  */
2574 
2575  if (!pcmk_is_set(after_wrapper->type, pe_order_implies_then)) {
2576  /* The order type between a group/clone and its child such as
2577  * B.start-> B_child.start is:
2578  * pe_order_implies_first_printed | pe_order_runnable_left
2579  *
2580  * Proceed through the ordering chain and build dependencies with
2581  * its children.
2582  */
2583  if (after->rsc == NULL
2584  || after->rsc->variant < pe_group
2585  || probe->rsc->parent == after->rsc
2586  || after_wrapper->action->rsc == NULL
2587  || after_wrapper->action->rsc->variant > pe_group
2588  || after->rsc != after_wrapper->action->rsc->parent) {
2589  continue;
2590  }
2591 
2592  /* Proceed to the children of a group or a non-interleaved clone.
2593  * For an interleaved clone, proceed only to the relevant child.
2594  */
2595  if (after->rsc->variant > pe_group
2596  && interleave == TRUE
2597  && (compatible_rsc == NULL
2598  || compatible_rsc != after_wrapper->action->rsc)) {
2599  continue;
2600  }
2601  }
2602 
2603  crm_trace("Proceeding through %s %s -> %s %s (type=0x%.6x)",
2604  after->uuid,
2605  after->node ? after->node->details->uname: "",
2606  after_wrapper->action->uuid,
2607  after_wrapper->action->node ? after_wrapper->action->node->details->uname : "",
2608  after_wrapper->type);
2609 
2610  order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
2611  }
2612 }
2613 
2614 static void clear_actions_tracking_flag(pe_working_set_t * data_set)
2615 {
2616  GListPtr gIter = NULL;
2617 
2618  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2619  pe_action_t *action = (pe_action_t *) gIter->data;
2620 
2621  if (pcmk_is_set(action->flags, pe_action_tracking)) {
2623  }
2624  }
2625 }
2626 
2627 static void
2628 order_first_rsc_probes(pe_resource_t * rsc, pe_working_set_t * data_set)
2629 {
2630  GListPtr gIter = NULL;
2631  GListPtr probes = NULL;
2632 
2633  for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
2634  pe_resource_t * child = (pe_resource_t *) gIter->data;
2635 
2636  order_first_rsc_probes(child, data_set);
2637  }
2638 
2639  if (rsc->variant != pe_native) {
2640  return;
2641  }
2642 
2643  probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
2644 
2645  for (gIter = probes; gIter != NULL; gIter= gIter->next) {
2646  pe_action_t *probe = (pe_action_t *) gIter->data;
2647  GListPtr aIter = NULL;
2648 
2649  for (aIter = probe->actions_after; aIter != NULL; aIter = aIter->next) {
2650  pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) aIter->data;
2651 
2652  order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
2653  clear_actions_tracking_flag(data_set);
2654  }
2655  }
2656 
2657  g_list_free(probes);
2658 }
2659 
2660 static void
2661 order_first_probes(pe_working_set_t * data_set)
2662 {
2663  GListPtr gIter = NULL;
2664 
2665  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2666  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
2667 
2668  order_first_rsc_probes(rsc, data_set);
2669  }
2670 
2671  order_first_probes_imply_stops(data_set);
2672 }
2673 
2674 static void
2675 order_then_probes(pe_working_set_t * data_set)
2676 {
2677 #if 0
2678  GListPtr gIter = NULL;
2679 
2680  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2681  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
2682 
2683  /* Given "A then B", we would prefer to wait for A to be
2684  * started before probing B.
2685  *
2686  * If A was a filesystem on which the binaries and data for B
2687  * lived, it would have been useful if the author of B's agent
2688  * could assume that A is running before B.monitor will be
2689  * called.
2690  *
2691  * However we can't _only_ probe once A is running, otherwise
2692  * we'd not detect the state of B if A could not be started
2693  * for some reason.
2694  *
2695  * In practice however, we cannot even do an opportunistic
2696  * version of this because B may be moving:
2697  *
2698  * B.probe -> B.start
2699  * B.probe -> B.stop
2700  * B.stop -> B.start
2701  * A.stop -> A.start
2702  * A.start -> B.probe
2703  *
2704  * So far so good, but if we add the result of this code:
2705  *
2706  * B.stop -> A.stop
2707  *
2708  * Then we get a loop:
2709  *
2710  * B.probe -> B.stop -> A.stop -> A.start -> B.probe
2711  *
2712  * We could kill the 'B.probe -> B.stop' dependency, but that
2713  * could mean stopping B "too" soon, because B.start must wait
2714  * for the probes to complete.
2715  *
2716  * Another option is to allow it only if A is a non-unique
2717  * clone with clone-max == node-max (since we'll never be
2718  * moving it). However, we could still be stopping one
2719  * instance at the same time as starting another.
2720 
2721  * The complexity of checking for allowed conditions combined
2722  * with the ever narrowing usecase suggests that this code
2723  * should remain disabled until someone gets smarter.
2724  */
2725  pe_action_t *start = NULL;
2726  GListPtr actions = NULL;
2727  GListPtr probes = NULL;
2728 
2729  actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE);
2730 
2731  if (actions) {
2732  start = actions->data;
2733  g_list_free(actions);
2734  }
2735 
2736  if(start == NULL) {
2737  crm_err("No start action for %s", rsc->id);
2738  continue;
2739  }
2740 
2741  probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
2742 
2743  for (actions = start->actions_before; actions != NULL; actions = actions->next) {
2744  pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data;
2745 
2746  GListPtr pIter = NULL;
2747  pe_action_t *first = before->action;
2748  pe_resource_t *first_rsc = first->rsc;
2749 
2750  if(first->required_runnable_before) {
2751  GListPtr clone_actions = NULL;
2752  for (clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) {
2753  before = (pe_action_wrapper_t *) clone_actions->data;
2754 
2755  crm_trace("Testing %s -> %s (%p) for %s", first->uuid, before->action->uuid, before->action->rsc, start->uuid);
2756 
2757  CRM_ASSERT(before->action->rsc);
2758  first_rsc = before->action->rsc;
2759  break;
2760  }
2761 
2762  } else if(!pcmk__str_eq(first->task, RSC_START, pcmk__str_casei)) {
2763  crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
2764  }
2765 
2766  if(first_rsc == NULL) {
2767  continue;
2768 
2769  } else if(uber_parent(first_rsc) == uber_parent(start->rsc)) {
2770  crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
2771  continue;
2772 
2773  } else if(FALSE && pe_rsc_is_clone(uber_parent(first_rsc)) == FALSE) {
2774  crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
2775  continue;
2776  }
2777 
2778  crm_err("Applying %s before %s %d", first->uuid, start->uuid, uber_parent(first_rsc)->variant);
2779 
2780  for (pIter = probes; pIter != NULL; pIter = pIter->next) {
2781  pe_action_t *probe = (pe_action_t *) pIter->data;
2782 
2783  crm_err("Ordering %s before %s", first->uuid, probe->uuid);
2784  order_actions(first, probe, pe_order_optional);
2785  }
2786  }
2787  }
2788 #endif
2789 }
2790 
2791 static void
2792 order_probes(pe_working_set_t * data_set)
2793 {
2794  order_first_probes(data_set);
2795  order_then_probes(data_set);
2796 }
2797 
2798 gboolean
2800 {
2801  GList *gIter = NULL;
2802 
2803  crm_trace("Applying ordering constraints");
2804 
2805  /* Don't ask me why, but apparently they need to be processed in
2806  * the order they were created in... go figure
2807  *
2808  * Also g_list_append() has horrendous performance characteristics
2809  * So we need to use g_list_prepend() and then reverse the list here
2810  */
2811  data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints);
2812 
2813  for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
2814  pe__ordering_t *order = gIter->data;
2815  pe_resource_t *rsc = order->lh_rsc;
2816 
2817  crm_trace("Applying ordering constraint: %d", order->id);
2818 
2819  if (rsc != NULL) {
2820  crm_trace("rsc_action-to-*");
2821  rsc_order_first(rsc, order, data_set);
2822  continue;
2823  }
2824 
2825  rsc = order->rh_rsc;
2826  if (rsc != NULL) {
2827  crm_trace("action-to-rsc_action");
2828  rsc_order_then(order->lh_action, rsc, order);
2829 
2830  } else {
2831  crm_trace("action-to-action");
2832  order_actions(order->lh_action, order->rh_action, order->type);
2833  }
2834  }
2835 
2836  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2837  pe_action_t *action = (pe_action_t *) gIter->data;
2838 
2839  update_colo_start_chain(action, data_set);
2840  }
2841 
2842  crm_trace("Ordering probes");
2843  order_probes(data_set);
2844 
2845  crm_trace("Updating %d actions", g_list_length(data_set->actions));
2846  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2847  pe_action_t *action = (pe_action_t *) gIter->data;
2848 
2849  update_action(action, data_set);
2850  }
2851 
2852  // Check for invalid orderings
2853  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2854  pe_action_t *action = (pe_action_t *) gIter->data;
2855  pe_action_wrapper_t *input = NULL;
2856 
2857  for (GList *input_iter = action->actions_before;
2858  input_iter != NULL; input_iter = input_iter->next) {
2859 
2860  input = (pe_action_wrapper_t *) input_iter->data;
2861  if (pcmk__ordering_is_invalid(action, input)) {
2862  input->type = pe_order_none;
2863  }
2864  }
2865  }
2866 
2867  LogNodeActions(data_set, FALSE);
2868  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2869  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
2870 
2871  LogActions(rsc, data_set, FALSE);
2872  }
2873  return TRUE;
2874 }
2875 
2876 static int transition_id = -1;
2877 
2884 void
2885 pcmk__log_transition_summary(const char *filename)
2886 {
2887  if (was_processing_error) {
2888  crm_err("Calculated transition %d (with errors), saving inputs in %s",
2889  transition_id, filename);
2890 
2891  } else if (was_processing_warning) {
2892  crm_warn("Calculated transition %d (with warnings), saving inputs in %s",
2893  transition_id, filename);
2894 
2895  } else {
2896  crm_notice("Calculated transition %d, saving inputs in %s",
2897  transition_id, filename);
2898  }
2899  if (pcmk__config_error) {
2900  crm_notice("Configuration errors found during scheduler processing,"
2901  " please run \"crm_verify -L\" to identify issues");
2902  }
2903 }
2904 
2905 /*
2906  * Create a dependency graph to send to the transitioner (via the controller)
2907  */
2908 gboolean
2910 {
2911  GListPtr gIter = NULL;
2912  const char *value = NULL;
2913 
2914  transition_id++;
2915  crm_trace("Creating transition graph %d.", transition_id);
2916 
2917  data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
2918 
2919  value = pe_pref(data_set->config_hash, "cluster-delay");
2920  crm_xml_add(data_set->graph, "cluster-delay", value);
2921 
2922  value = pe_pref(data_set->config_hash, "stonith-timeout");
2923  crm_xml_add(data_set->graph, "stonith-timeout", value);
2924 
2925  crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY");
2926 
2927  if (pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)) {
2928  crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY");
2929  } else {
2930  crm_xml_add(data_set->graph, "failed-start-offset", "1");
2931  }
2932 
2933  value = pe_pref(data_set->config_hash, "batch-limit");
2934  crm_xml_add(data_set->graph, "batch-limit", value);
2935 
2936  crm_xml_add_int(data_set->graph, "transition_id", transition_id);
2937 
2938  value = pe_pref(data_set->config_hash, "migration-limit");
2939  if (crm_parse_ll(value, NULL) > 0) {
2940  crm_xml_add(data_set->graph, "migration-limit", value);
2941  }
2942 
2943  if (data_set->recheck_by > 0) {
2944  char *recheck_epoch = NULL;
2945 
2946  recheck_epoch = crm_strdup_printf("%llu",
2947  (long long) data_set->recheck_by);
2948  crm_xml_add(data_set->graph, "recheck-by", recheck_epoch);
2949  free(recheck_epoch);
2950  }
2951 
2952 /* errors...
2953  slist_iter(action, pe_action_t, action_list, lpc,
2954  if(action->optional == FALSE && action->runnable == FALSE) {
2955  print_action("Ignoring", action, TRUE);
2956  }
2957  );
2958 */
2959 
2960  /* The following code will de-duplicate action inputs, so nothing past this
2961  * should rely on the action input type flags retaining their original
2962  * values.
2963  */
2964 
2965  gIter = data_set->resources;
2966  for (; gIter != NULL; gIter = gIter->next) {
2967  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
2968 
2969  pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id);
2970  rsc->cmds->expand(rsc, data_set);
2971  }
2972 
2973  crm_log_xml_trace(data_set->graph, "created resource-driven action list");
2974 
2975  /* pseudo action to distribute list of nodes with maintenance state update */
2976  add_maintenance_update(data_set);
2977 
2978  /* catch any non-resource specific actions */
2979  crm_trace("processing non-resource actions");
2980 
2981  gIter = data_set->actions;
2982  for (; gIter != NULL; gIter = gIter->next) {
2983  pe_action_t *action = (pe_action_t *) gIter->data;
2984 
2985  if (action->rsc
2986  && action->node
2987  && action->node->details->shutdown
2988  && !pcmk_is_set(action->rsc->flags, pe_rsc_maintenance)
2989  && !pcmk_any_flags_set(action->flags,
2991  && pcmk__str_eq(action->task, RSC_STOP, pcmk__str_none)
2992  ) {
2993  /* Eventually we should just ignore the 'fence' case
2994  * But for now it's the best way to detect (in CTS) when
2995  * CIB resource updates are being lost
2996  */
2997  if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)
2998  || data_set->no_quorum_policy == no_quorum_ignore) {
2999  crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)",
3000  action->node->details->unclean ? "fence" : "shut down",
3001  action->node->details->uname, action->rsc->id,
3002  pcmk_is_set(action->rsc->flags, pe_rsc_managed)? " blocked" : " unmanaged",
3003  pcmk_is_set(action->rsc->flags, pe_rsc_failed)? " failed" : "",
3004  action->uuid);
3005  }
3006  }
3007 
3008  graph_element_from_action(action, data_set);
3009  }
3010 
3011  crm_log_xml_trace(data_set->graph, "created generic action list");
3012  crm_trace("Created transition graph %d.", transition_id);
3013 
3014  return TRUE;
3015 }
3016 
3017 void
3018 LogNodeActions(pe_working_set_t * data_set, gboolean terminal)
3019 {
3020  GListPtr gIter = NULL;
3021 
3022  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
3023  char *node_name = NULL;
3024  char *task = NULL;
3025  pe_action_t *action = (pe_action_t *) gIter->data;
3026 
3027  if (action->rsc != NULL) {
3028  continue;
3029  } else if (pcmk_is_set(action->flags, pe_action_optional)) {
3030  continue;
3031  }
3032 
3033  if (pe__is_guest_node(action->node)) {
3034  node_name = crm_strdup_printf("%s (resource: %s)", action->node->details->uname, action->node->details->remote_rsc->container->id);
3035  } else if(action->node) {
3036  node_name = crm_strdup_printf("%s", action->node->details->uname);
3037  }
3038 
3039 
3040  if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
3041  task = strdup("Shutdown");
3042  } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) {
3043  const char *op = g_hash_table_lookup(action->meta, "stonith_action");
3044  task = crm_strdup_printf("Fence (%s)", op);
3045  }
3046 
3047  if(task == NULL) {
3048  /* Nothing to report */
3049  } else if(terminal && action->reason) {
3050  printf(" * %s %s '%s'\n", task, node_name, action->reason);
3051  } else if(terminal) {
3052  printf(" * %s %s\n", task, node_name);
3053  } else if(action->reason) {
3054  crm_notice(" * %s %s '%s'\n", task, node_name, action->reason);
3055  } else {
3056  crm_notice(" * %s %s\n", task, node_name);
3057  }
3058 
3059  free(node_name);
3060  free(task);
3061  }
3062 }
pe_action_t * pe_cancel_op(pe_resource_t *rsc, const char *name, guint interval_ms, pe_node_t *node, pe_working_set_t *data_set)
#define LOG_TRACE
Definition: logging.h:36
void pe__foreach_param_check(pe_working_set_t *data_set, void(*cb)(pe_resource_t *, pe_node_t *, xmlNode *, enum pe_check_parameters, pe_working_set_t *))
Definition: remote.c:246
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:215
#define pe__clear_action_flags_as(function, line, action, flags_to_clear)
Definition: internal.h:102
GListPtr nodes
Definition: pe_types.h:148
void complex_set_cmds(pe_resource_t *rsc)
xmlNode * find_xml_node(xmlNode *cib, const char *node_path, gboolean must_find)
Definition: xml.c:447
void group_append_meta(pe_resource_t *rsc, xmlNode *xml)
enum pe_action_flags clone_action_flags(pe_action_t *action, pe_node_t *node)
pe_action_t * lh_action
Definition: internal.h:181
gboolean stage7(pe_working_set_t *data_set)
enum pe_quorum_policy no_quorum_policy
Definition: pe_types.h:140
#define RSC_STOP
Definition: crm.h:199
void clone_append_meta(pe_resource_t *rsc, xmlNode *xml)
void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set)
A dumping ground.
gboolean parse_op_key(const char *key, char **rsc_id, char **op_type, guint *interval_ms)
Definition: operations.c:184
#define crm_notice(fmt, args...)
Definition: logging.h:349
#define CRMD_ACTION_MIGRATED
Definition: crm.h:169
#define pe_flag_stop_action_orphans
Definition: pe_types.h:103
bool pe__is_guest_or_remote_node(const pe_node_t *node)
Definition: remote.c:41
GHashTable * attrs
Definition: pe_types.h:223
enum pe_action_flags pcmk__bundle_action_flags(pe_action_t *action, pe_node_t *node)
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:19
#define XML_CONFIG_ATTR_SHUTDOWN_LOCK
Definition: msg_xml.h:356
#define crm_crit(fmt, args...)
Definition: logging.h:346
#define INFINITY
Definition: crm.h:95
#define pe__set_action_flags(action, flags_to_set)
Definition: internal.h:59
#define CRM_OP_FENCE
Definition: crm.h:141
#define XML_ATTR_TRANSITION_MAGIC
Definition: msg_xml.h:367
GList * sort_nodes_by_weight(GList *nodes, pe_node_t *active_node, pe_working_set_t *data_set)
pe_check_parameters
Definition: pe_types.h:184
#define XML_TAG_GRAPH
Definition: msg_xml.h:295
#define stop_action(rsc, node, optional)
Definition: internal.h:417
void pe__add_param_check(xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, enum pe_check_parameters, pe_working_set_t *data_set)
Definition: remote.c:220
pe_resource_t * container
Definition: pe_types.h:367
bool pcmk__strcase_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:842
pe_node_t * partial_migration_source
Definition: pe_types.h:352
gboolean stage3(pe_working_set_t *data_set)
#define pe_flag_concurrent_fencing
Definition: pe_types.h:100
#define XML_ATTR_TYPE
Definition: msg_xml.h:99
void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set)
Definition: utils.c:2727
#define XML_CIB_TAG_CONSTRAINTS
Definition: msg_xml.h:150
#define CRM_OP_REPROBE
Definition: crm.h:150
resource_alloc_functions_t * cmds
Definition: pe_types.h:320
#define pe_flag_symmetric_cluster
Definition: pe_types.h:94
bool pcmk__ordering_is_invalid(pe_action_t *action, pe_action_wrapper_t *input)
gboolean stage4(pe_working_set_t *data_set)
xmlNode * get_object_root(const char *object_type, xmlNode *the_root)
Definition: cib_utils.c:146
void clone_rsc_colocation_lh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
#define pe_flag_no_compat
Definition: pe_types.h:125
xmlNode * xml
Definition: pe_types.h:310
pe_resource_t * rsc
Definition: pe_types.h:391
enum rsc_role_e next_role
Definition: pe_types.h:358
#define pe__show_node_weights(level, rsc, text, nodes)
Definition: internal.h:393
void add_maintenance_update(pe_working_set_t *data_set)
const char * crm_xml_add_int(xmlNode *node, const char *name, int value)
Create an XML attribute with specified name and integer value.
Definition: nvpair.c:425
gboolean exclusive_discover
Definition: pe_types.h:339
int char2score(const char *score)
Definition: utils.c:61
pe_resource_t * remote_rsc
Definition: pe_types.h:219
pe_action_t * sched_shutdown_op(pe_node_t *node, pe_working_set_t *data_set)
GHashTable * meta
Definition: pe_types.h:360
#define pe_rsc_unique
Definition: pe_types.h:243
resource_object_functions_t * fns
Definition: pe_types.h:319
#define XML_LRM_TAG_RESOURCE
Definition: msg_xml.h:234
void pe_action_set_flag_reason(const char *function, long line, pe_action_t *action, pe_action_t *reason, const char *text, enum pe_action_flags flags, bool overwrite)
Definition: utils.c:2620
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition: nvpair.c:317
remote_connection_state
void ReloadRsc(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set)
int crm_parse_int(const char *text, const char *default_text)
Parse an integer value from a string.
Definition: strings.c:134
void pcmk__bundle_rsc_colocation_rh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
GListPtr resources
Definition: pe_types.h:149
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
Definition: utils.c:1713
gboolean unpack_constraints(xmlNode *xml_constraints, pe_working_set_t *data_set)
#define pe_flag_have_status
Definition: pe_types.h:112
gboolean stage2(pe_working_set_t *data_set)
void group_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
enum action_tasks text2task(const char *task)
Definition: common.c:358
time_t get_effective_time(pe_working_set_t *data_set)
Definition: utils.c:1830
gboolean stage5(pe_working_set_t *data_set)
const char * pe_pref(GHashTable *options, const char *name)
Definition: common.c:312
pe_action_t * rh_action
Definition: internal.h:186
void(* internal_constraints)(pe_resource_t *, pe_working_set_t *)
void resource_location(pe_resource_t *rsc, pe_node_t *node, int score, const char *tag, pe_working_set_t *data_set)
Definition: utils.c:1669
xmlNode * params_restart
Definition: internal.h:519
enum pe_graph_flags native_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set)
void native_expand(pe_resource_t *rsc, pe_working_set_t *data_set)
#define XML_CIB_TAG_LRM
Definition: msg_xml.h:232
void native_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
pe_resource_t * rsc_lh
Definition: internal.h:169
pe_node_t * partial_migration_target
Definition: pe_types.h:351
#define RSC_START
Definition: crm.h:196
int migration_threshold
Definition: pe_types.h:331
pe_node_t * allocated_to
Definition: pe_types.h:350
pe_action_t * action
Definition: pe_types.h:510
GHashTable * pcmk__native_merge_weights(pe_resource_t *rsc, const char *rhs, GHashTable *nodes, const char *attr, float factor, uint32_t flags)
gboolean remote_was_fenced
Definition: pe_types.h:214
bool pcmk__ends_with(const char *s, const char *match)
Definition: strings.c:504
gboolean can_run_resources(const pe_node_t *node)
#define pe_flag_have_quorum
Definition: pe_types.h:93
bool pe__bundle_needs_remote_name(pe_resource_t *rsc)
Definition: bundle.c:957
#define CRM_SCORE_INFINITY
Definition: crm.h:81
gboolean remote_requires_reset
Definition: pe_types.h:213
pe_node_t * pe_find_node_id(GListPtr node_list, const char *id)
Definition: status.c:419
const char * action
Definition: pcmk_fence.c:30
#define pe__set_resource_flags(resource, flags_to_set)
Definition: internal.h:47
void trigger_unfencing(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t *data_set)
Definition: utils.c:2549
pe_node_t * lock_node
Definition: pe_types.h:371
#define CRMD_ACTION_START
Definition: crm.h:171
void group_rsc_colocation_lh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
gboolean is_dc
Definition: pe_types.h:210
void native_rsc_colocation_lh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
GListPtr placement_constraints
Definition: pe_types.h:150
bool pe__is_remote_node(const pe_node_t *node)
Definition: remote.c:25
#define XML_LRM_ATTR_TASK
Definition: msg_xml.h:267
pe__location_t * rsc2node_new(const char *id, pe_resource_t *rsc, int weight, const char *discovery_mode, pe_node_t *node, pe_working_set_t *data_set)
void native_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
#define CRMD_ACTION_STOP
Definition: crm.h:174
#define CRM_OP_CLEAR_FAILCOUNT
Definition: crm.h:151
#define pe_warn(fmt...)
Definition: internal.h:27
int weight
Definition: pe_types.h:230
#define pe_flag_have_remote_nodes
Definition: pe_types.h:113
#define crm_warn(fmt, args...)
Definition: logging.h:348
guint remote_reconnect_ms
Definition: pe_types.h:332
void LogActions(pe_resource_t *rsc, pe_working_set_t *data_set, gboolean terminal)
void native_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
gboolean(* create_probe)(pe_resource_t *, pe_node_t *, pe_action_t *, gboolean, pe_working_set_t *)
void clone_expand(pe_resource_t *rsc, pe_working_set_t *data_set)
pe_action_flags
Definition: pe_types.h:279
int pcmk__guint_from_hash(GHashTable *table, const char *key, guint default_val, guint *result)
Definition: strings.c:290
int rc
Definition: pcmk_fence.c:35
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition: nvpair.c:615
#define pe_rsc_failed
Definition: pe_types.h:255
#define crm_debug(fmt, args...)
Definition: logging.h:352
pe_resource_t * uber_parent(pe_resource_t *rsc)
Definition: complex.c:799
#define pe_flag_sanitized
Definition: pe_types.h:116
#define pe__clear_order_flags(order_flags, flags_to_clear)
Definition: internal.h:118
pe_node_t * pcmk__clone_allocate(pe_resource_t *rsc, pe_node_t *preferred, pe_working_set_t *data_set)
#define XML_CIB_ATTR_SHUTDOWN
Definition: msg_xml.h:253
#define XML_ATTR_ID
Definition: msg_xml.h:96
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:523
#define XML_BOOLEAN_TRUE
Definition: msg_xml.h:107
#define XML_CIB_TAG_STATE
Definition: msg_xml.h:165
bool pe__is_guest_node(const pe_node_t *node)
Definition: remote.c:33
gboolean stage0(pe_working_set_t *data_set)
#define stop_key(rsc)
Definition: internal.h:416
enum pe_graph_flags group_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set)
#define pe_rsc_start_pending
Definition: pe_types.h:257
char * task
Definition: pe_types.h:395
void group_rsc_colocation_rh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
gboolean update_action(pe_action_t *action, pe_working_set_t *data_set)
#define pe__clear_action_flags(action, flags_to_clear)
Definition: internal.h:68
int custom_action_order(pe_resource_t *lh_rsc, char *lh_task, pe_action_t *lh_action, pe_resource_t *rh_rsc, char *rh_task, pe_action_t *rh_action, enum pe_ordering type, pe_working_set_t *data_set)
#define crm_trace(fmt, args...)
Definition: logging.h:353
enum rsc_digest_cmp_val rc
Definition: internal.h:516
void pe_fence_node(pe_working_set_t *data_set, pe_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:97
char * digest_secure_calc
Definition: internal.h:521
void calculate_active_ops(GList *sorted_op_list, int *start_index, int *stop_index)
Definition: unpack.c:2191
GHashTable * meta
Definition: pe_types.h:405
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:196
GListPtr find_actions(GListPtr input, const char *key, const pe_node_t *on_node)
Definition: utils.c:1533
struct pe_node_shared_s * details
Definition: pe_types.h:233
GListPtr running_on
Definition: pe_types.h:353
pe_node_t * node
Definition: pe_types.h:392
bool pe__shutdown_requested(pe_node_t *node)
Definition: utils.c:2712
#define XML_AGENT_ATTR_PROVIDER
Definition: msg_xml.h:237
gboolean order_actions(pe_action_t *lh_action, pe_action_t *rh_action, enum pe_ordering order)
Definition: utils.c:1883
unsigned long long flags
Definition: pe_types.h:335
const char * uname
Definition: pe_types.h:198
void(* expand)(pe_resource_t *, pe_working_set_t *)
GHashTable * pcmk__group_merge_weights(pe_resource_t *rsc, const char *rhs, GHashTable *nodes, const char *attr, float factor, uint32_t flags)
GListPtr actions
Definition: pe_types.h:155
Wrappers for and extensions to libxml2.
#define XML_ATTR_TE_NOWAIT
Definition: msg_xml.h:370
GHashTable * config_hash
Definition: pe_types.h:142
char * clone_name
Definition: pe_types.h:309
xmlNode * find_rsc_op_entry(pe_resource_t *rsc, const char *key)
Definition: utils.c:1366
pe_resource_t * lh_rsc
Definition: internal.h:180
xmlNode * create_xml_node(xmlNode *parent, const char *name)
Definition: xml.c:663
time_t lock_time
Definition: pe_types.h:372
time_t recheck_by
Definition: pe_types.h:178
void pcmk__log_transition_summary(const char *filename)
#define pe_flag_stonith_enabled
Definition: pe_types.h:97
const char * pe_node_attribute_raw(pe_node_t *node, const char *name)
Definition: common.c:629
gboolean native_create_probe(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set)
enum pe_ordering type
Definition: internal.h:177
char * uuid
Definition: pe_types.h:396
#define XML_LRM_ATTR_RESTART_DIGEST
Definition: msg_xml.h:283
void group_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
pe_node_t * pcmk__bundle_allocate(pe_resource_t *rsc, pe_node_t *preferred, pe_working_set_t *data_set)
enum pe_obj_types variant
Definition: pe_types.h:317
gboolean xml_has_children(const xmlNode *root)
Definition: xml.c:2010
xmlNode * input
Definition: pe_types.h:128
const char * placement_strategy
Definition: pe_types.h:135
int rsc_discover_mode
Definition: pe_types.h:234
xmlNode * params_all
Definition: internal.h:517
GListPtr actions
Definition: pe_types.h:346
#define CRM_OP_SHUTDOWN
Definition: crm.h:140
void pe__free_param_checks(pe_working_set_t *data_set)
Definition: remote.c:261
const char * id
Definition: pe_types.h:197
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition: operations.c:44
gboolean pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set)
guint shutdown_lock
Definition: pe_types.h:180
GListPtr ordering_constraints
Definition: pe_types.h:151
bool pe_can_fence(pe_working_set_t *data_set, pe_node_t *node)
Definition: utils.c:87
int pcmk__score_green
Definition: utils.c:57
pe_resource_t * find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set)
#define PCMK_RESOURCE_CLASS_STONITH
Definition: services.h:52
gboolean rsc_discovery_enabled
Definition: pe_types.h:212
#define XML_LRM_ATTR_SECURE_DIGEST
Definition: msg_xml.h:284
void group_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
enum rsc_role_e(* state)(const pe_resource_t *, gboolean)
Definition: pe_types.h:52
gboolean show_utilization
void dump_node_capacity(int level, const char *comment, pe_node_t *node)
Definition: utils.c:350
gboolean probe_resources(pe_working_set_t *data_set)
enum pe_action_flags group_action_flags(pe_action_t *action, pe_node_t *node)
void update_colo_start_chain(pe_action_t *action, pe_working_set_t *data_set)
long long crm_parse_ll(const char *text, const char *default_text)
Parse a long long integer value from a string.
Definition: strings.c:107
void LogNodeActions(pe_working_set_t *data_set, gboolean terminal)
Cluster status and scheduling.
GListPtr children
Definition: pe_types.h:364
void add_hash_param(GHashTable *hash, const char *name, const char *value)
Definition: common.c:573
void pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
int pcmk__score_red
Definition: utils.c:56
void clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
void clone_rsc_colocation_rh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
#define XML_LRM_TAG_RESOURCES
Definition: msg_xml.h:233
int pe__add_scores(int score1, int score2)
Definition: common.c:510
#define crm_err(fmt, args...)
Definition: logging.h:347
#define CRM_ASSERT(expr)
Definition: results.h:42
#define RSC_STATUS
Definition: crm.h:210
pe_action_t * pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_working_set_t *data_set)
Schedule a controller operation to clear a fail count.
Definition: failcounts.c:360
Cluster Configuration.
op_digest_cache_t * rsc_action_digest_cmp(pe_resource_t *rsc, xmlNode *xml_op, pe_node_t *node, pe_working_set_t *data_set)
Definition: utils.c:2148
#define RSC_PROMOTE
Definition: crm.h:202
gboolean cluster_status(pe_working_set_t *data_set)
Definition: status.c:70
int pcmk__score_yellow
Definition: utils.c:58
void(* rsc_location)(pe_resource_t *, pe__location_t *)
GListPtr actions_after
Definition: pe_types.h:429
#define XML_LRM_ATTR_INTERVAL_MS
Definition: msg_xml.h:265
#define crm_log_xml_info(xml, text)
Definition: logging.h:359
#define DIMOF(a)
Definition: crm.h:57
gboolean stage8(pe_working_set_t *data_set)
void clone_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
#define XML_LRM_ATTR_CALLID
Definition: msg_xml.h:279
#define CRMD_ACTION_MIGRATE
Definition: crm.h:168
gboolean shutdown
Definition: pe_types.h:208
char data[0]
Definition: internal.h:90
#define crm_str(x)
Definition: logging.h:373
void pcmk__bundle_expand(pe_resource_t *rsc, pe_working_set_t *data_set)
gboolean clone_create_probe(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set)
void native_append_meta(pe_resource_t *rsc, xmlNode *xml)
void(* create_actions)(pe_resource_t *, pe_working_set_t *)
bool pcmk__config_error
Definition: utils.c:52
#define pe_flag_stdout
Definition: pe_types.h:117
enum pe_action_flags flags
Definition: pe_types.h:400
gboolean maintenance
Definition: pe_types.h:211
#define CRM_OP_PROBED
Definition: crm.h:149
#define pe_rsc_maintenance
Definition: pe_types.h:264
#define pe_rsc_failure_ignored
Definition: pe_types.h:263
pe_resource_t * rh_rsc
Definition: internal.h:185
#define XML_CIB_TAG_STATUS
Definition: msg_xml.h:146
gboolean update_action_flags(pe_action_t *action, enum pe_action_flags flags, const char *source, int line)
bool pe__resource_is_remote_conn(const pe_resource_t *rsc, const pe_working_set_t *data_set)
Definition: remote.c:17
#define crm_log_xml_trace(xml, text)
Definition: logging.h:361
gboolean crm_is_true(const char *s)
Definition: strings.c:392
bool pcmk__starts_with(const char *str, const char *prefix)
Check whether a string starts with a certain sequence.
Definition: strings.c:452
CRM_TRACE_INIT_DATA(pacemaker)
#define XML_LRM_TAG_RSC_OP
Definition: msg_xml.h:235
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:20
#define pe__set_order_flags(order_flags, flags_to_set)
Definition: internal.h:111
#define start_key(rsc)
Definition: internal.h:422
pe_action_t * find_first_action(GListPtr input, const char *uuid, const char *task, pe_node_t *on_node)
Definition: utils.c:1503
#define ID(x)
Definition: msg_xml.h:425
unsigned long long flags
Definition: pe_types.h:137
#define pe_err(fmt...)
Definition: internal.h:22
gboolean was_processing_error
Definition: common.c:20
int stickiness
Definition: pe_types.h:328
#define XML_RSC_ATTR_INTERLEAVE
Definition: msg_xml.h:192
#define PCMK__OP_FMT
Definition: internal.h:204
GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: utils.c:1621
pe_action_t * pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set)
Definition: utils.c:2787
gboolean stage6(pe_working_set_t *data_set)
resource_alloc_functions_t resource_class_alloc_functions[]
gboolean was_processing_warning
Definition: common.c:21
void clone_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
enum pe_ordering type
Definition: pe_types.h:508
gboolean unclean
Definition: pe_types.h:206
char * crm_strdup_printf(char const *format,...) __attribute__((__format__(__printf__
#define LOG_STDOUT
Definition: logging.h:41
GList * GListPtr
Definition: crm.h:214
#define pe__clear_raw_action_flags(action_flags, action_name, flags_to_clear)
Definition: internal.h:84
#define pe_flag_start_failure_fatal
Definition: pe_types.h:106
enum node_type type
Definition: pe_types.h:199
gboolean DeleteRsc(pe_resource_t *rsc, pe_node_t *node, gboolean optional, pe_working_set_t *data_set)
void group_expand(pe_resource_t *rsc, pe_working_set_t *data_set)
void pcmk__bundle_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
void graph_element_from_action(pe_action_t *action, pe_working_set_t *data_set)
#define crm_info(fmt, args...)
Definition: logging.h:350
enum pe_action_flags native_action_flags(pe_action_t *action, pe_node_t *node)
#define pe_rsc_managed
Definition: pe_types.h:238
#define pe_rsc_orphan
Definition: pe_types.h:237
enum pe_graph_flags pcmk__multi_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set)
void pcmk__bundle_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
void set_alloc_actions(pe_working_set_t *data_set)
void pcmk__bundle_append_meta(pe_resource_t *rsc, xmlNode *xml)
pe_ordering
Definition: pe_types.h:464
gboolean online
Definition: pe_types.h:202
uint64_t flags
Definition: remote.c:149
int pe_get_failcount(pe_node_t *node, pe_resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set)
Definition: failcounts.c:251
action_tasks
Definition: common.h:62
pe_resource_t * parent
Definition: pe_types.h:315
pe_action_t * pe_fence_op(pe_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t *data_set)
Definition: utils.c:2443
#define pe_flag_shutdown_lock
Definition: pe_types.h:109
enum crm_ais_msg_types type
Definition: internal.h:83
#define RSC_DEMOTE
Definition: crm.h:204
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:18
void pcmk__bundle_rsc_colocation_lh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
void native_rsc_colocation_rh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
pe_node_t * pcmk__native_allocate(pe_resource_t *rsc, pe_node_t *preferred, pe_working_set_t *data_set)
#define XML_AGENT_ATTR_CLASS
Definition: msg_xml.h:236
xmlNode * graph
Definition: pe_types.h:167
char * id
Definition: pe_types.h:308
GHashTable * allowed_nodes
Definition: pe_types.h:355
pe_action_t * custom_action(pe_resource_t *rsc, char *key, const char *task, pe_node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set)
Definition: utils.c:465
#define RSC_MIGRATED
Definition: crm.h:194
#define pe_flag_startup_probes
Definition: pe_types.h:111
pe_node_t * pcmk__group_allocate(pe_resource_t *rsc, pe_node_t *preferred, pe_working_set_t *data_set)
#define pe_flag_stop_rsc_orphans
Definition: pe_types.h:102
#define pe__set_action_flags_as(function, line, action, flags_to_set)
Definition: internal.h:93