pacemaker  2.1.0-7c3f660
Scalable High-Availability cluster resource manager
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
pcmk_sched_allocate.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2021 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <sys/param.h>
13 
14 #include <crm/crm.h>
15 #include <crm/cib.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
19 
20 #include <glib.h>
21 
22 #include <crm/pengine/status.h>
23 #include <pacemaker-internal.h>
24 
25 CRM_TRACE_INIT_DATA(pacemaker);
26 
27 extern bool pcmk__is_daemon;
28 
29 void set_alloc_actions(pe_working_set_t * data_set);
30 extern void ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set);
31 extern gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set);
32 static void apply_remote_node_ordering(pe_working_set_t *data_set);
33 static enum remote_connection_state get_remote_node_state(pe_node_t *node);
34 
41 };
42 
43 static const char *
44 state2text(enum remote_connection_state state)
45 {
46  switch (state) {
48  return "unknown";
49  case remote_state_alive:
50  return "alive";
52  return "resting";
54  return "failed";
56  return "stopped";
57  }
58 
59  return "impossible";
60 }
61 
63  {
76  },
77  {
90  },
91  {
102  clone_expand,
104  },
105  {
118  }
119 };
120 
121 gboolean
122 update_action_flags(pe_action_t * action, enum pe_action_flags flags, const char *source, int line)
123 {
124  static unsigned long calls = 0;
125  gboolean changed = FALSE;
126  gboolean clear = pcmk_is_set(flags, pe_action_clear);
127  enum pe_action_flags last = action->flags;
128 
129  if (clear) {
130  pe__clear_action_flags_as(source, line, action, flags);
131  } else {
132  pe__set_action_flags_as(source, line, action, flags);
133  }
134 
135  if (last != action->flags) {
136  calls++;
137  changed = TRUE;
138  /* Useful for tracking down _who_ changed a specific flag */
139  /* CRM_ASSERT(calls != 534); */
140  pe__clear_raw_action_flags(flags, "action update", pe_action_clear);
141  crm_trace("%s on %s: %sset flags 0x%.6x (was 0x%.6x, now 0x%.6x, %lu, %s)",
142  action->uuid, action->node ? action->node->details->uname : "[none]",
143  clear ? "un-" : "", flags, last, action->flags, calls, source);
144  }
145 
146  return changed;
147 }
148 
149 static gboolean
150 check_rsc_parameters(pe_resource_t * rsc, pe_node_t * node, xmlNode * rsc_entry,
151  gboolean active_here, pe_working_set_t * data_set)
152 {
153  int attr_lpc = 0;
154  gboolean force_restart = FALSE;
155  gboolean delete_resource = FALSE;
156  gboolean changed = FALSE;
157 
158  const char *value = NULL;
159  const char *old_value = NULL;
160 
161  const char *attr_list[] = {
165  };
166 
167  for (; attr_lpc < PCMK__NELEM(attr_list); attr_lpc++) {
168  value = crm_element_value(rsc->xml, attr_list[attr_lpc]);
169  old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]);
170  if (value == old_value /* i.e. NULL */
171  || pcmk__str_eq(value, old_value, pcmk__str_none)) {
172  continue;
173  }
174 
175  changed = TRUE;
176  trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set);
177  if (active_here) {
178  force_restart = TRUE;
179  crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s",
180  rsc->id, node->details->uname, attr_list[attr_lpc],
181  crm_str(old_value), crm_str(value));
182  }
183  }
184  if (force_restart) {
185  /* make sure the restart happens */
186  stop_action(rsc, node, FALSE);
188  delete_resource = TRUE;
189 
190  } else if (changed) {
191  delete_resource = TRUE;
192  }
193  return delete_resource;
194 }
195 
196 static void
197 CancelXmlOp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * active_node,
198  const char *reason, pe_working_set_t * data_set)
199 {
200  guint interval_ms = 0;
201  pe_action_t *cancel = NULL;
202 
203  const char *task = NULL;
204  const char *call_id = NULL;
205 
206  CRM_CHECK(xml_op != NULL, return);
207  CRM_CHECK(active_node != NULL, return);
208 
209  task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
210  call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
211  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
212 
213  crm_info("Action " PCMK__OP_FMT " on %s will be stopped: %s",
214  rsc->id, task, interval_ms,
215  active_node->details->uname, (reason? reason : "unknown"));
216 
217  cancel = pe_cancel_op(rsc, task, interval_ms, active_node, data_set);
218  add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
219  custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set);
220 }
221 
222 static gboolean
223 check_action_definition(pe_resource_t * rsc, pe_node_t * active_node, xmlNode * xml_op,
224  pe_working_set_t * data_set)
225 {
226  char *key = NULL;
227  guint interval_ms = 0;
228  const op_digest_cache_t *digest_data = NULL;
229  gboolean did_change = FALSE;
230 
231  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
232  const char *digest_secure = NULL;
233 
234  CRM_CHECK(active_node != NULL, return FALSE);
235 
236  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
237  if (interval_ms > 0) {
238  xmlNode *op_match = NULL;
239 
240  /* we need to reconstruct the key because of the way we used to construct resource IDs */
241  key = pcmk__op_key(rsc->id, task, interval_ms);
242 
243  pe_rsc_trace(rsc, "Checking parameters for %s", key);
244  op_match = find_rsc_op_entry(rsc, key);
245 
246  if ((op_match == NULL)
248  CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set);
249  free(key);
250  return TRUE;
251 
252  } else if (op_match == NULL) {
253  pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname);
254  free(key);
255  return TRUE;
256  }
257  free(key);
258  key = NULL;
259  }
260 
261  crm_trace("Testing " PCMK__OP_FMT " on %s",
262  rsc->id, task, interval_ms, active_node->details->uname);
263  if ((interval_ms == 0) && pcmk__str_eq(task, RSC_STATUS, pcmk__str_casei)) {
264  /* Reload based on the start action not a probe */
265  task = RSC_START;
266 
267  } else if ((interval_ms == 0) && pcmk__str_eq(task, RSC_MIGRATED, pcmk__str_casei)) {
268  /* Reload based on the start action not a migrate */
269  task = RSC_START;
270  } else if ((interval_ms == 0) && pcmk__str_eq(task, RSC_PROMOTE, pcmk__str_casei)) {
271  /* Reload based on the start action not a promote */
272  task = RSC_START;
273  }
274 
275  digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set);
276 
277  if (pcmk_is_set(data_set->flags, pe_flag_sanitized)) {
278  digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
279  }
280 
281  if(digest_data->rc != RSC_DIGEST_MATCH
282  && digest_secure
283  && digest_data->digest_secure_calc
284  && strcmp(digest_data->digest_secure_calc, digest_secure) == 0) {
285  if (!pcmk__is_daemon && data_set->priv != NULL) {
286  pcmk__output_t *out = data_set->priv;
287  out->info(out, "Only 'private' parameters to "
288  PCMK__OP_FMT " on %s changed: %s", rsc->id, task,
289  interval_ms, active_node->details->uname,
291  }
292 
293  } else if (digest_data->rc == RSC_DIGEST_RESTART) {
294  /* Changes that force a restart */
295  pe_action_t *required = NULL;
296 
297  did_change = TRUE;
298  key = pcmk__op_key(rsc->id, task, interval_ms);
299  crm_log_xml_info(digest_data->params_restart, "params:restart");
300  required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
301  pe_action_set_flag_reason(__func__, __LINE__, required, NULL,
302  "resource definition change", pe_action_optional, TRUE);
303 
304  trigger_unfencing(rsc, active_node, "Device parameters changed", NULL, data_set);
305 
306  } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) {
307  // Changes that can potentially be handled by an agent reload
308  const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
309 
310  did_change = TRUE;
311  trigger_unfencing(rsc, active_node, "Device parameters changed (reload)", NULL, data_set);
312  crm_log_xml_info(digest_data->params_all, "params:reload");
313  key = pcmk__op_key(rsc->id, task, interval_ms);
314 
315  if (interval_ms > 0) {
316  pe_action_t *op = NULL;
317 
318 #if 0
319  /* Always reload/restart the entire resource */
320  ReloadRsc(rsc, active_node, data_set);
321 #else
322  /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */
323  op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set);
325 #endif
326 
327  } else if (digest_restart) {
328  pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id);
329 
330  /* Reload this resource */
331  ReloadRsc(rsc, active_node, data_set);
332  free(key);
333 
334  } else {
335  pe_action_t *required = NULL;
336  pe_rsc_trace(rsc, "Resource %s doesn't support agent reloads",
337  rsc->id);
338 
339  /* Re-send the start/demote/promote op
340  * Recurring ops will be detected independently
341  */
342  required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
343  pe_action_set_flag_reason(__func__, __LINE__, required, NULL,
344  "resource definition change", pe_action_optional, TRUE);
345  }
346  }
347 
348  return did_change;
349 }
350 
357 static void
358 check_params(pe_resource_t *rsc, pe_node_t *node, xmlNode *rsc_op,
359  enum pe_check_parameters check, pe_working_set_t *data_set)
360 {
361  const char *reason = NULL;
362  op_digest_cache_t *digest_data = NULL;
363 
364  switch (check) {
365  case pe_check_active:
366  if (check_action_definition(rsc, node, rsc_op, data_set)
367  && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
368  data_set)) {
369 
370  reason = "action definition changed";
371  }
372  break;
373 
375  digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, data_set);
376  switch (digest_data->rc) {
377  case RSC_DIGEST_UNKNOWN:
378  crm_trace("Resource %s history entry %s on %s has no digest to compare",
379  rsc->id, ID(rsc_op), node->details->id);
380  break;
381  case RSC_DIGEST_MATCH:
382  break;
383  default:
384  reason = "resource parameters have changed";
385  break;
386  }
387  break;
388  }
389 
390  if (reason) {
391  pe__clear_failcount(rsc, node, reason, data_set);
392  }
393 }
394 
395 static void
396 check_actions_for(xmlNode * rsc_entry, pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set)
397 {
398  GList *gIter = NULL;
399  int offset = -1;
400  int stop_index = 0;
401  int start_index = 0;
402 
403  const char *task = NULL;
404 
405  xmlNode *rsc_op = NULL;
406  GList *op_list = NULL;
407  GList *sorted_op_list = NULL;
408 
409  CRM_CHECK(node != NULL, return);
410 
411  if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
412  pe_resource_t *parent = uber_parent(rsc);
413  if(parent == NULL
414  || pe_rsc_is_clone(parent) == FALSE
415  || pcmk_is_set(parent->flags, pe_rsc_unique)) {
416  pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id);
417  DeleteRsc(rsc, node, FALSE, data_set);
418  } else {
419  pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id);
420  }
421  return;
422 
423  } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
424  if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) {
425  DeleteRsc(rsc, node, FALSE, data_set);
426  }
427  pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s",
428  rsc->id, node->details->uname);
429  return;
430  }
431 
432  pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname);
433 
434  if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) {
435  DeleteRsc(rsc, node, FALSE, data_set);
436  }
437 
438  for (rsc_op = pcmk__xe_first_child(rsc_entry); rsc_op != NULL;
439  rsc_op = pcmk__xe_next(rsc_op)) {
440 
441  if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) {
442  op_list = g_list_prepend(op_list, rsc_op);
443  }
444  }
445 
446  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
447  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
448 
449  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
450  xmlNode *rsc_op = (xmlNode *) gIter->data;
451  guint interval_ms = 0;
452 
453  offset++;
454 
455  if (start_index < stop_index) {
456  /* stopped */
457  continue;
458  } else if (offset < start_index) {
459  /* action occurred prior to a start */
460  continue;
461  }
462 
463  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
464  crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
465 
466  if ((interval_ms > 0) &&
468  // Maintenance mode cancels recurring operations
469  CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set);
470 
471  } else if ((interval_ms > 0) || pcmk__strcase_any_of(task, RSC_STATUS, RSC_START,
472  RSC_PROMOTE, RSC_MIGRATED, NULL)) {
473  /* If a resource operation failed, and the operation's definition
474  * has changed, clear any fail count so they can be retried fresh.
475  */
476 
477  if (pe__bundle_needs_remote_name(rsc, data_set)) {
478  /* We haven't allocated resources to nodes yet, so if the
479  * REMOTE_CONTAINER_HACK is used, we may calculate the digest
480  * based on the literal "#uname" value rather than the properly
481  * substituted value. That would mistakenly make the action
482  * definition appear to have been changed. Defer the check until
483  * later in this case.
484  */
485  pe__add_param_check(rsc_op, rsc, node, pe_check_active,
486  data_set);
487 
488  } else if (check_action_definition(rsc, node, rsc_op, data_set)
489  && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
490  data_set)) {
491  pe__clear_failcount(rsc, node, "action definition changed",
492  data_set);
493  }
494  }
495  }
496  g_list_free(sorted_op_list);
497 }
498 
499 static GList *
500 find_rsc_list(GList *result, pe_resource_t * rsc, const char *id, gboolean renamed_clones,
501  gboolean partial, pe_working_set_t * data_set)
502 {
503  GList *gIter = NULL;
504  gboolean match = FALSE;
505 
506  if (id == NULL) {
507  return NULL;
508  }
509 
510  if (rsc == NULL) {
511  if (data_set == NULL) {
512  return NULL;
513  }
514  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
515  pe_resource_t *child = (pe_resource_t *) gIter->data;
516 
517  result = find_rsc_list(result, child, id, renamed_clones, partial,
518  NULL);
519  }
520  return result;
521  }
522 
523  if (partial) {
524  if (strstr(rsc->id, id)) {
525  match = TRUE;
526 
527  } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) {
528  match = TRUE;
529  }
530 
531  } else {
532  if (strcmp(rsc->id, id) == 0) {
533  match = TRUE;
534 
535  } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) {
536  match = TRUE;
537  }
538  }
539 
540  if (match) {
541  result = g_list_prepend(result, rsc);
542  }
543 
544  if (rsc->children) {
545  gIter = rsc->children;
546  for (; gIter != NULL; gIter = gIter->next) {
547  pe_resource_t *child = (pe_resource_t *) gIter->data;
548 
549  result = find_rsc_list(result, child, id, renamed_clones, partial, NULL);
550  }
551  }
552 
553  return result;
554 }
555 
556 static void
557 check_actions(pe_working_set_t * data_set)
558 {
559  const char *id = NULL;
560  pe_node_t *node = NULL;
561  xmlNode *lrm_rscs = NULL;
562  xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
563 
564  xmlNode *node_state = NULL;
565 
566  for (node_state = pcmk__xe_first_child(status); node_state != NULL;
567  node_state = pcmk__xe_next(node_state)) {
568 
569  if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE,
570  pcmk__str_none)) {
571  id = crm_element_value(node_state, XML_ATTR_ID);
572  lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
573  lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE);
574 
575  node = pe_find_node_id(data_set->nodes, id);
576 
577  if (node == NULL) {
578  continue;
579 
580  /* Still need to check actions for a maintenance node to cancel existing monitor operations */
581  } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) {
582  crm_trace("Skipping param check for %s: can't run resources",
583  node->details->uname);
584  continue;
585  }
586 
587  crm_trace("Processing node %s", node->details->uname);
588  if (node->details->online
589  || pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
590  xmlNode *rsc_entry = NULL;
591 
592  for (rsc_entry = pcmk__xe_first_child(lrm_rscs);
593  rsc_entry != NULL;
594  rsc_entry = pcmk__xe_next(rsc_entry)) {
595 
596  if (pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) {
597 
598  if (xml_has_children(rsc_entry)) {
599  GList *gIter = NULL;
600  GList *result = NULL;
601  const char *rsc_id = ID(rsc_entry);
602 
603  CRM_CHECK(rsc_id != NULL, return);
604 
605  result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set);
606  for (gIter = result; gIter != NULL; gIter = gIter->next) {
607  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
608 
609  if (rsc->variant != pe_native) {
610  continue;
611  }
612  check_actions_for(rsc_entry, rsc, node, data_set);
613  }
614  g_list_free(result);
615  }
616  }
617  }
618  }
619  }
620  }
621 }
622 
623 static void
624 apply_placement_constraints(pe_working_set_t * data_set)
625 {
626  for (GList *gIter = data_set->placement_constraints;
627  gIter != NULL; gIter = gIter->next) {
628  pe__location_t *cons = gIter->data;
629 
630  cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons);
631  }
632 }
633 
634 static gboolean
635 failcount_clear_action_exists(pe_node_t * node, pe_resource_t * rsc)
636 {
637  gboolean rc = FALSE;
638  GList *list = pe__resource_actions(rsc, node, CRM_OP_CLEAR_FAILCOUNT, TRUE);
639 
640  if (list) {
641  rc = TRUE;
642  }
643  g_list_free(list);
644  return rc;
645 }
646 
655 static void
656 check_migration_threshold(pe_resource_t *rsc, pe_node_t *node,
657  pe_working_set_t *data_set)
658 {
659  int fail_count, countdown;
660  pe_resource_t *failed;
661 
662  /* Migration threshold of 0 means never force away */
663  if (rsc->migration_threshold == 0) {
664  return;
665  }
666 
667  // If we're ignoring failures, also ignore the migration threshold
669  return;
670  }
671 
672  /* If there are no failures, there's no need to force away */
673  fail_count = pe_get_failcount(node, rsc, NULL,
675  data_set);
676  if (fail_count <= 0) {
677  return;
678  }
679 
680  /* How many more times recovery will be tried on this node */
681  countdown = QB_MAX(rsc->migration_threshold - fail_count, 0);
682 
683  /* If failed resource has a parent, we'll force the parent away */
684  failed = rsc;
685  if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
686  failed = uber_parent(rsc);
687  }
688 
689  if (countdown == 0) {
690  resource_location(failed, node, -INFINITY, "__fail_limit__", data_set);
691  crm_warn("Forcing %s away from %s after %d failures (max=%d)",
692  failed->id, node->details->uname, fail_count,
693  rsc->migration_threshold);
694  } else {
695  crm_info("%s can fail %d more times on %s before being forced off",
696  failed->id, countdown, node->details->uname);
697  }
698 }
699 
700 static void
701 common_apply_stickiness(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set)
702 {
703  if (rsc->children) {
704  GList *gIter = rsc->children;
705 
706  for (; gIter != NULL; gIter = gIter->next) {
707  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
708 
709  common_apply_stickiness(child_rsc, node, data_set);
710  }
711  return;
712  }
713 
714  if (pcmk_is_set(rsc->flags, pe_rsc_managed)
715  && rsc->stickiness != 0 && pcmk__list_of_1(rsc->running_on)) {
716  pe_node_t *current = pe_find_node_id(rsc->running_on, node->details->id);
717  pe_node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
718 
719  if (current == NULL) {
720 
721  } else if ((match != NULL)
723  pe_resource_t *sticky_rsc = rsc;
724 
725  resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set);
726  pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location"
727  " (node=%s, weight=%d)", sticky_rsc->id,
728  node->details->uname, rsc->stickiness);
729  } else {
730  GHashTableIter iter;
731  pe_node_t *nIter = NULL;
732 
733  pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric"
734  " and node %s is not explicitly allowed", rsc->id, node->details->uname);
735  g_hash_table_iter_init(&iter, rsc->allowed_nodes);
736  while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) {
737  crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight);
738  }
739  }
740  }
741 
742  /* Check the migration threshold only if a failcount clear action
743  * has not already been placed for this resource on the node.
744  * There is no sense in potentially forcing the resource from this
745  * node if the failcount is being reset anyway.
746  *
747  * @TODO A clear_failcount operation can be scheduled in stage4() via
748  * check_actions_for(), or in stage5() via check_params(). This runs in
749  * stage2(), so it cannot detect those, meaning we might check the migration
750  * threshold when we shouldn't -- worst case, we stop or move the resource,
751  * then move it back next transition.
752  */
753  if (failcount_clear_action_exists(node, rsc) == FALSE) {
754  check_migration_threshold(rsc, node, data_set);
755  }
756 }
757 
758 void
760 {
761  GList *gIter = rsc->children;
762 
763  rsc->cmds = &resource_class_alloc_functions[rsc->variant];
764 
765  for (; gIter != NULL; gIter = gIter->next) {
766  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
767 
768  complex_set_cmds(child_rsc);
769  }
770 }
771 
772 void
774 {
775 
776  GList *gIter = data_set->resources;
777 
778  for (; gIter != NULL; gIter = gIter->next) {
779  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
780 
781  complex_set_cmds(rsc);
782  }
783 }
784 
785 static void
786 calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data)
787 {
788  const char *key = (const char *)gKey;
789  const char *value = (const char *)gValue;
790  int *system_health = (int *)user_data;
791 
792  if (!gKey || !gValue || !user_data) {
793  return;
794  }
795 
796  if (pcmk__starts_with(key, "#health")) {
797  int score;
798 
799  /* Convert the value into an integer */
800  score = char2score(value);
801 
802  /* Add it to the running total */
803  *system_health = pe__add_scores(score, *system_health);
804  }
805 }
806 
807 static gboolean
808 apply_system_health(pe_working_set_t * data_set)
809 {
810  GList *gIter = NULL;
811  const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy");
812  int base_health = 0;
813 
814  if (pcmk__str_eq(health_strategy, "none", pcmk__str_null_matches | pcmk__str_casei)) {
815  /* Prevent any accidental health -> score translation */
816  pcmk__score_red = 0;
817  pcmk__score_yellow = 0;
818  pcmk__score_green = 0;
819  return TRUE;
820 
821  } else if (pcmk__str_eq(health_strategy, "migrate-on-red", pcmk__str_casei)) {
822 
823  /* Resources on nodes which have health values of red are
824  * weighted away from that node.
825  */
827  pcmk__score_yellow = 0;
828  pcmk__score_green = 0;
829 
830  } else if (pcmk__str_eq(health_strategy, "only-green", pcmk__str_casei)) {
831 
832  /* Resources on nodes which have health values of red or yellow
833  * are forced away from that node.
834  */
837  pcmk__score_green = 0;
838 
839  } else if (pcmk__str_eq(health_strategy, "progressive", pcmk__str_casei)) {
840  /* Same as the above, but use the r/y/g scores provided by the user
841  * Defaults are provided by the pe_prefs table
842  * Also, custom health "base score" can be used
843  */
844  base_health = char2score(pe_pref(data_set->config_hash,
845  "node-health-base"));
846 
847  } else if (pcmk__str_eq(health_strategy, "custom", pcmk__str_casei)) {
848 
849  /* Requires the admin to configure the rsc_location constaints for
850  * processing the stored health scores
851  */
852  /* TODO: Check for the existence of appropriate node health constraints */
853  return TRUE;
854 
855  } else {
856  crm_err("Unknown node health strategy: %s", health_strategy);
857  return FALSE;
858  }
859 
860  crm_info("Applying automated node health strategy: %s", health_strategy);
861 
862  for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
863  int system_health = base_health;
864  pe_node_t *node = (pe_node_t *) gIter->data;
865 
866  /* Search through the node hash table for system health entries. */
867  g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health);
868 
869  crm_info(" Node %s has an combined system health of %d",
870  node->details->uname, system_health);
871 
872  /* If the health is non-zero, then create a new rsc2node so that the
873  * weight will be added later on.
874  */
875  if (system_health != 0) {
876 
877  GList *gIter2 = data_set->resources;
878 
879  for (; gIter2 != NULL; gIter2 = gIter2->next) {
880  pe_resource_t *rsc = (pe_resource_t *) gIter2->data;
881 
882  rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set);
883  }
884  }
885  }
886 
887  return TRUE;
888 }
889 
890 gboolean
892 {
893  xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input);
894 
895  if (data_set->input == NULL) {
896  return FALSE;
897  }
898 
899  if (!pcmk_is_set(data_set->flags, pe_flag_have_status)) {
900  crm_trace("Calculating status");
901  cluster_status(data_set);
902  }
903 
904  set_alloc_actions(data_set);
905  apply_system_health(data_set);
906  unpack_constraints(cib_constraints, data_set);
907 
908  return TRUE;
909 }
910 
911 /*
912  * Check nodes for resources started outside of the LRM
913  */
914 gboolean
916 {
917  pe_action_t *probe_node_complete = NULL;
918 
919  for (GList *gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
920  pe_node_t *node = (pe_node_t *) gIter->data;
921  const char *probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
922 
923  if (node->details->online == FALSE) {
924 
925  if (pe__is_remote_node(node) && node->details->remote_rsc
926  && (get_remote_node_state(node) == remote_state_failed)) {
927 
928  pe_fence_node(data_set, node, "the connection is unrecoverable", FALSE);
929  }
930  continue;
931 
932  } else if (node->details->unclean) {
933  continue;
934 
935  } else if (node->details->rsc_discovery_enabled == FALSE) {
936  /* resource discovery is disabled for this node */
937  continue;
938  }
939 
940  if (probed != NULL && crm_is_true(probed) == FALSE) {
941  pe_action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname),
942  CRM_OP_REPROBE, node, FALSE, TRUE, data_set);
943 
945  continue;
946  }
947 
948  for (GList *gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) {
949  pe_resource_t *rsc = (pe_resource_t *) gIter2->data;
950 
951  rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set);
952  }
953  }
954  return TRUE;
955 }
956 
957 static void
958 rsc_discover_filter(pe_resource_t *rsc, pe_node_t *node)
959 {
960  GList *gIter = rsc->children;
961  pe_resource_t *top = uber_parent(rsc);
962  pe_node_t *match;
963 
964  if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) {
965  return;
966  }
967 
968  for (; gIter != NULL; gIter = gIter->next) {
969  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
970  rsc_discover_filter(child_rsc, node);
971  }
972 
973  match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
974  if (match && match->rsc_discover_mode != pe_discover_exclusive) {
975  match->weight = -INFINITY;
976  }
977 }
978 
979 static time_t
980 shutdown_time(pe_node_t *node, pe_working_set_t *data_set)
981 {
982  const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
983  time_t result = 0;
984 
985  if (shutdown) {
986  long long result_ll;
987 
988  if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) {
989  result = (time_t) result_ll;
990  }
991  }
992  return result? result : get_effective_time(data_set);
993 }
994 
995 static void
996 apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set)
997 {
998  const char *class;
999 
1000  // Only primitives and (uncloned) groups may be locked
1001  if (rsc->variant == pe_group) {
1002  for (GList *item = rsc->children; item != NULL;
1003  item = item->next) {
1004  apply_shutdown_lock((pe_resource_t *) item->data, data_set);
1005  }
1006  } else if (rsc->variant != pe_native) {
1007  return;
1008  }
1009 
1010  // Fence devices and remote connections can't be locked
1011  class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
1012  if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches)
1013  || pe__resource_is_remote_conn(rsc, data_set)) {
1014  return;
1015  }
1016 
1017  if (rsc->lock_node != NULL) {
1018  // The lock was obtained from resource history
1019 
1020  if (rsc->running_on != NULL) {
1021  /* The resource was started elsewhere even though it is now
1022  * considered locked. This shouldn't be possible, but as a
1023  * failsafe, we don't want to disturb the resource now.
1024  */
1025  pe_rsc_info(rsc,
1026  "Cancelling shutdown lock because %s is already active",
1027  rsc->id);
1028  pe__clear_resource_history(rsc, rsc->lock_node, data_set);
1029  rsc->lock_node = NULL;
1030  rsc->lock_time = 0;
1031  }
1032 
1033  // Only a resource active on exactly one node can be locked
1034  } else if (pcmk__list_of_1(rsc->running_on)) {
1035  pe_node_t *node = rsc->running_on->data;
1036 
1037  if (node->details->shutdown) {
1038  if (node->details->unclean) {
1039  pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
1040  rsc->id, node->details->uname);
1041  } else {
1042  rsc->lock_node = node;
1043  rsc->lock_time = shutdown_time(node, data_set);
1044  }
1045  }
1046  }
1047 
1048  if (rsc->lock_node == NULL) {
1049  // No lock needed
1050  return;
1051  }
1052 
1053  if (data_set->shutdown_lock > 0) {
1054  time_t lock_expiration = rsc->lock_time + data_set->shutdown_lock;
1055 
1056  pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
1057  rsc->id, rsc->lock_node->details->uname,
1058  (long long) lock_expiration);
1059  pe__update_recheck_time(++lock_expiration, data_set);
1060  } else {
1061  pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
1062  rsc->id, rsc->lock_node->details->uname);
1063  }
1064 
1065  // If resource is locked to one node, ban it from all other nodes
1066  for (GList *item = data_set->nodes; item != NULL; item = item->next) {
1067  pe_node_t *node = item->data;
1068 
1069  if (strcmp(node->details->uname, rsc->lock_node->details->uname)) {
1071  XML_CONFIG_ATTR_SHUTDOWN_LOCK, data_set);
1072  }
1073  }
1074 }
1075 
1076 /*
1077  * \internal
1078  * \brief Stage 2 of cluster status: apply node-specific criteria
1079  *
1080  * Count known nodes, and apply location constraints, stickiness, and exclusive
1081  * resource discovery.
1082  */
1083 gboolean
1085 {
1086  GList *gIter = NULL;
1087 
1088  if (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) {
1089  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1090  apply_shutdown_lock((pe_resource_t *) gIter->data, data_set);
1091  }
1092  }
1093 
1094  if (!pcmk_is_set(data_set->flags, pe_flag_no_compat)) {
1095  // @COMPAT API backward compatibility
1096  for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1097  pe_node_t *node = (pe_node_t *) gIter->data;
1098 
1099  if (node && (node->weight >= 0) && node->details->online
1100  && (node->details->type != node_ping)) {
1101  data_set->max_valid_nodes++;
1102  }
1103  }
1104  }
1105 
1106  apply_placement_constraints(data_set);
1107 
1108  gIter = data_set->nodes;
1109  for (; gIter != NULL; gIter = gIter->next) {
1110  GList *gIter2 = NULL;
1111  pe_node_t *node = (pe_node_t *) gIter->data;
1112 
1113  gIter2 = data_set->resources;
1114  for (; gIter2 != NULL; gIter2 = gIter2->next) {
1115  pe_resource_t *rsc = (pe_resource_t *) gIter2->data;
1116 
1117  common_apply_stickiness(rsc, node, data_set);
1118  rsc_discover_filter(rsc, node);
1119  }
1120  }
1121 
1122  return TRUE;
1123 }
1124 
1125 /*
1126  * Create internal resource constraints before allocation
1127  */
1128 gboolean
1130 {
1131 
1132  GList *gIter = data_set->resources;
1133 
1134  for (; gIter != NULL; gIter = gIter->next) {
1135  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1136 
1137  rsc->cmds->internal_constraints(rsc, data_set);
1138  }
1139 
1140  return TRUE;
1141 }
1142 
1143 /*
1144  * Check for orphaned or redefined actions
1145  */
1146 gboolean
1148 {
1149  check_actions(data_set);
1150  return TRUE;
1151 }
1152 
1153 static void *
1154 convert_const_pointer(const void *ptr)
1155 {
1156  /* Worst function ever */
1157  return (void *)ptr;
1158 }
1159 
1160 static gint
1161 sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data)
1162 {
1163  int rc = 0;
1164  int r1_weight = -INFINITY;
1165  int r2_weight = -INFINITY;
1166 
1167  const char *reason = "existence";
1168 
1169  GList *nodes = (GList *) data;
1170  const pe_resource_t *resource1 = a;
1171  const pe_resource_t *resource2 = b;
1172 
1173  pe_node_t *r1_node = NULL;
1174  pe_node_t *r2_node = NULL;
1175  GList *gIter = NULL;
1176  GHashTable *r1_nodes = NULL;
1177  GHashTable *r2_nodes = NULL;
1178 
1179  reason = "priority";
1180  r1_weight = resource1->priority;
1181  r2_weight = resource2->priority;
1182 
1183  if (r1_weight > r2_weight) {
1184  rc = -1;
1185  goto done;
1186  }
1187 
1188  if (r1_weight < r2_weight) {
1189  rc = 1;
1190  goto done;
1191  }
1192 
1193  reason = "no node list";
1194  if (nodes == NULL) {
1195  goto done;
1196  }
1197 
1198  r1_nodes = pcmk__native_merge_weights(convert_const_pointer(resource1),
1199  resource1->id, NULL, NULL, 1,
1201  pe__show_node_weights(true, NULL, resource1->id, r1_nodes,
1202  resource1->cluster);
1203 
1204  r2_nodes = pcmk__native_merge_weights(convert_const_pointer(resource2),
1205  resource2->id, NULL, NULL, 1,
1207  pe__show_node_weights(true, NULL, resource2->id, r2_nodes,
1208  resource2->cluster);
1209 
1210  /* Current location score */
1211  reason = "current location";
1212  r1_weight = -INFINITY;
1213  r2_weight = -INFINITY;
1214 
1215  if (resource1->running_on) {
1216  r1_node = pe__current_node(resource1);
1217  r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id);
1218  if (r1_node != NULL) {
1219  r1_weight = r1_node->weight;
1220  }
1221  }
1222  if (resource2->running_on) {
1223  r2_node = pe__current_node(resource2);
1224  r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id);
1225  if (r2_node != NULL) {
1226  r2_weight = r2_node->weight;
1227  }
1228  }
1229 
1230  if (r1_weight > r2_weight) {
1231  rc = -1;
1232  goto done;
1233  }
1234 
1235  if (r1_weight < r2_weight) {
1236  rc = 1;
1237  goto done;
1238  }
1239 
1240  reason = "score";
1241  for (gIter = nodes; gIter != NULL; gIter = gIter->next) {
1242  pe_node_t *node = (pe_node_t *) gIter->data;
1243 
1244  r1_node = NULL;
1245  r2_node = NULL;
1246 
1247  r1_weight = -INFINITY;
1248  if (r1_nodes) {
1249  r1_node = g_hash_table_lookup(r1_nodes, node->details->id);
1250  }
1251  if (r1_node) {
1252  r1_weight = r1_node->weight;
1253  }
1254 
1255  r2_weight = -INFINITY;
1256  if (r2_nodes) {
1257  r2_node = g_hash_table_lookup(r2_nodes, node->details->id);
1258  }
1259  if (r2_node) {
1260  r2_weight = r2_node->weight;
1261  }
1262 
1263  if (r1_weight > r2_weight) {
1264  rc = -1;
1265  goto done;
1266  }
1267 
1268  if (r1_weight < r2_weight) {
1269  rc = 1;
1270  goto done;
1271  }
1272  }
1273 
1274  done:
1275  crm_trace("%s (%d) on %s %c %s (%d) on %s: %s",
1276  resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a",
1277  rc < 0 ? '>' : rc > 0 ? '<' : '=',
1278  resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason);
1279 
1280  if (r1_nodes) {
1281  g_hash_table_destroy(r1_nodes);
1282  }
1283  if (r2_nodes) {
1284  g_hash_table_destroy(r2_nodes);
1285  }
1286 
1287  return rc;
1288 }
1289 
1290 static void
1291 allocate_resources(pe_working_set_t * data_set)
1292 {
1293  GList *gIter = NULL;
1294 
1295  if (pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) {
1296  /* Allocate remote connection resources first (which will also allocate
1297  * any colocation dependencies). If the connection is migrating, always
1298  * prefer the partial migration target.
1299  */
1300  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1301  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1302  if (rsc->is_remote_node == FALSE) {
1303  continue;
1304  }
1305  pe_rsc_trace(rsc, "Allocating remote connection resource '%s'",
1306  rsc->id);
1307  rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set);
1308  }
1309  }
1310 
1311  /* now do the rest of the resources */
1312  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1313  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1314  if (rsc->is_remote_node == TRUE) {
1315  continue;
1316  }
1317  pe_rsc_trace(rsc, "Allocating %s resource '%s'",
1318  crm_element_name(rsc->xml), rsc->id);
1319  rsc->cmds->allocate(rsc, NULL, data_set);
1320  }
1321 }
1322 
1323 /* We always use pe_order_preserve with these convenience functions to exempt
1324  * internally generated constraints from the prohibition of user constraints
1325  * involving remote connection resources.
1326  *
1327  * The start ordering additionally uses pe_order_runnable_left so that the
1328  * specified action is not runnable if the start is not runnable.
1329  */
1330 
1331 static inline void
1332 order_start_then_action(pe_resource_t *lh_rsc, pe_action_t *rh_action,
1333  enum pe_ordering extra, pe_working_set_t *data_set)
1334 {
1335  if (lh_rsc && rh_action && data_set) {
1336  custom_action_order(lh_rsc, start_key(lh_rsc), NULL,
1337  rh_action->rsc, NULL, rh_action,
1338  pe_order_preserve | pe_order_runnable_left | extra,
1339  data_set);
1340  }
1341 }
1342 
1343 static inline void
1344 order_action_then_stop(pe_action_t *lh_action, pe_resource_t *rh_rsc,
1345  enum pe_ordering extra, pe_working_set_t *data_set)
1346 {
1347  if (lh_action && rh_rsc && data_set) {
1348  custom_action_order(lh_action->rsc, NULL, lh_action,
1349  rh_rsc, stop_key(rh_rsc), NULL,
1350  pe_order_preserve | extra, data_set);
1351  }
1352 }
1353 
1354 // Clear fail counts for orphaned rsc on all online nodes
1355 static void
1356 cleanup_orphans(pe_resource_t * rsc, pe_working_set_t * data_set)
1357 {
1358  GList *gIter = NULL;
1359 
1360  for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1361  pe_node_t *node = (pe_node_t *) gIter->data;
1362 
1363  if (node->details->online
1364  && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
1365  data_set)) {
1366 
1367  pe_action_t *clear_op = NULL;
1368 
1369  clear_op = pe__clear_failcount(rsc, node, "it is orphaned",
1370  data_set);
1371 
1372  /* We can't use order_action_then_stop() here because its
1373  * pe_order_preserve breaks things
1374  */
1375  custom_action_order(clear_op->rsc, NULL, clear_op,
1376  rsc, stop_key(rsc), NULL,
1377  pe_order_optional, data_set);
1378  }
1379  }
1380 }
1381 
1382 gboolean
1384 {
1385  pcmk__output_t *out = data_set->priv;
1386  GList *gIter = NULL;
1387 
1388  if (!pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei)) {
1389  GList *nodes = g_list_copy(data_set->nodes);
1390 
1391  nodes = sort_nodes_by_weight(nodes, NULL, data_set);
1392  data_set->resources =
1393  g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes);
1394 
1395  g_list_free(nodes);
1396  }
1397 
1398  gIter = data_set->nodes;
1399  for (; gIter != NULL; gIter = gIter->next) {
1400  pe_node_t *node = (pe_node_t *) gIter->data;
1401 
1402  if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) {
1403  out->message(out, "node-capacity", node, "Original");
1404  }
1405  }
1406 
1407  crm_trace("Allocating services");
1408  /* Take (next) highest resource, assign it and create its actions */
1409 
1410  allocate_resources(data_set);
1411 
1412  gIter = data_set->nodes;
1413  for (; gIter != NULL; gIter = gIter->next) {
1414  pe_node_t *node = (pe_node_t *) gIter->data;
1415 
1416  if (pcmk_is_set(data_set->flags, pe_flag_show_utilization)) {
1417  out->message(out, "node-capacity", node, "Remaining");
1418  }
1419  }
1420 
1421  // Process deferred action checks
1422  pe__foreach_param_check(data_set, check_params);
1423  pe__free_param_checks(data_set);
1424 
1425  if (pcmk_is_set(data_set->flags, pe_flag_startup_probes)) {
1426  crm_trace("Calculating needed probes");
1427  /* This code probably needs optimization
1428  * ptest -x with 100 nodes, 100 clones and clone-max=100:
1429 
1430  With probes:
1431 
1432  ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
1433  ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
1434  ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
1435  ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions
1436  ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
1437  ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services
1438  ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes
1439  ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions
1440  ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done
1441  ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
1442  ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
1443  36s
1444  ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
1445 
1446  Without probes:
1447 
1448  ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
1449  ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
1450  ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
1451  ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions
1452  ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
1453  ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services
1454  ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions
1455  ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done
1456  ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
1457  ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
1458  ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
1459  */
1460 
1461  probe_resources(data_set);
1462  }
1463 
1464  crm_trace("Handle orphans");
1465  if (pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)) {
1466  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1467  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1468 
1469  /* There's no need to recurse into rsc->children because those
1470  * should just be unallocated clone instances.
1471  */
1472  if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
1473  cleanup_orphans(rsc, data_set);
1474  }
1475  }
1476  }
1477 
1478  crm_trace("Creating actions");
1479 
1480  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1481  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1482 
1483  rsc->cmds->create_actions(rsc, data_set);
1484  }
1485 
1486  crm_trace("Creating done");
1487  return TRUE;
1488 }
1489 
1490 static gboolean
1491 is_managed(const pe_resource_t * rsc)
1492 {
1493  GList *gIter = rsc->children;
1494 
1495  if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
1496  return TRUE;
1497  }
1498 
1499  for (; gIter != NULL; gIter = gIter->next) {
1500  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
1501 
1502  if (is_managed(child_rsc)) {
1503  return TRUE;
1504  }
1505  }
1506 
1507  return FALSE;
1508 }
1509 
1510 static gboolean
1511 any_managed_resources(pe_working_set_t * data_set)
1512 {
1513 
1514  GList *gIter = data_set->resources;
1515 
1516  for (; gIter != NULL; gIter = gIter->next) {
1517  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1518 
1519  if (is_managed(rsc)) {
1520  return TRUE;
1521  }
1522  }
1523  return FALSE;
1524 }
1525 
1533 static void
1534 fence_guest(pe_node_t *node, pe_working_set_t *data_set)
1535 {
1536  pe_resource_t *container = node->details->remote_rsc->container;
1537  pe_action_t *stop = NULL;
1538  pe_action_t *stonith_op = NULL;
1539 
1540  /* The fence action is just a label; we don't do anything differently for
1541  * off vs. reboot. We specify it explicitly, rather than let it default to
1542  * cluster's default action, because we are not _initiating_ fencing -- we
1543  * are creating a pseudo-event to describe fencing that is already occurring
1544  * by other means (container recovery).
1545  */
1546  const char *fence_action = "off";
1547 
1548  /* Check whether guest's container resource has any explicit stop or
1549  * start (the stop may be implied by fencing of the guest's host).
1550  */
1551  if (container) {
1552  stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL);
1553 
1554  if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) {
1555  fence_action = "reboot";
1556  }
1557  }
1558 
1559  /* Create a fence pseudo-event, so we have an event to order actions
1560  * against, and the controller can always detect it.
1561  */
1562  stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", FALSE, data_set);
1564  __func__, __LINE__);
1565 
1566  /* We want to imply stops/demotes after the guest is stopped, not wait until
1567  * it is restarted, so we always order pseudo-fencing after stop, not start
1568  * (even though start might be closer to what is done for a real reboot).
1569  */
1570  if ((stop != NULL) && pcmk_is_set(stop->flags, pe_action_pseudo)) {
1571  pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, FALSE, data_set);
1572  crm_info("Implying guest node %s is down (action %d) after %s fencing",
1573  node->details->uname, stonith_op->id, stop->node->details->uname);
1574  order_actions(parent_stonith_op, stonith_op,
1576 
1577  } else if (stop) {
1578  order_actions(stop, stonith_op,
1580  crm_info("Implying guest node %s is down (action %d) "
1581  "after container %s is stopped (action %d)",
1582  node->details->uname, stonith_op->id,
1583  container->id, stop->id);
1584  } else {
1585  /* If we're fencing the guest node but there's no stop for the guest
1586  * resource, we must think the guest is already stopped. However, we may
1587  * think so because its resource history was just cleaned. To avoid
1588  * unnecessarily considering the guest node down if it's really up,
1589  * order the pseudo-fencing after any stop of the connection resource,
1590  * which will be ordered after any container (re-)probe.
1591  */
1592  stop = find_first_action(node->details->remote_rsc->actions, NULL,
1593  RSC_STOP, NULL);
1594 
1595  if (stop) {
1596  order_actions(stop, stonith_op, pe_order_optional);
1597  crm_info("Implying guest node %s is down (action %d) "
1598  "after connection is stopped (action %d)",
1599  node->details->uname, stonith_op->id, stop->id);
1600  } else {
1601  /* Not sure why we're fencing, but everything must already be
1602  * cleanly stopped.
1603  */
1604  crm_info("Implying guest node %s is down (action %d) ",
1605  node->details->uname, stonith_op->id);
1606  }
1607  }
1608 
1609  /* Order/imply other actions relative to pseudo-fence as with real fence */
1610  pcmk__order_vs_fence(stonith_op, data_set);
1611 }
1612 
1613 /*
1614  * Create dependencies for stonith and shutdown operations
1615  */
1616 gboolean
1618 {
1619  pe_action_t *dc_down = NULL;
1620  pe_action_t *stonith_op = NULL;
1621  gboolean integrity_lost = FALSE;
1622  gboolean need_stonith = TRUE;
1623  GList *gIter;
1624  GList *stonith_ops = NULL;
1625  GList *shutdown_ops = NULL;
1626 
1627  /* Remote ordering constraints need to happen prior to calculating fencing
1628  * because it is one more place we will mark the node as dirty.
1629  *
1630  * A nice side effect of doing them early is that apply_*_ordering() can be
1631  * simpler because pe_fence_node() has already done some of the work.
1632  */
1633  crm_trace("Creating remote ordering constraints");
1634  apply_remote_node_ordering(data_set);
1635 
1636  crm_trace("Processing fencing and shutdown cases");
1637  if (any_managed_resources(data_set) == FALSE) {
1638  crm_notice("Delaying fencing operations until there are resources to manage");
1639  need_stonith = FALSE;
1640  }
1641 
1642  /* Check each node for stonith/shutdown */
1643  for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1644  pe_node_t *node = (pe_node_t *) gIter->data;
1645 
1646  /* Guest nodes are "fenced" by recovering their container resource,
1647  * so handle them separately.
1648  */
1649  if (pe__is_guest_node(node)) {
1650  if (node->details->remote_requires_reset && need_stonith
1651  && pe_can_fence(data_set, node)) {
1652  fence_guest(node, data_set);
1653  }
1654  continue;
1655  }
1656 
1657  stonith_op = NULL;
1658 
1659  if (node->details->unclean
1660  && need_stonith && pe_can_fence(data_set, node)) {
1661 
1662  stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, data_set);
1663  pe_warn("Scheduling Node %s for STONITH", node->details->uname);
1664 
1665  pcmk__order_vs_fence(stonith_op, data_set);
1666 
1667  if (node->details->is_dc) {
1668  // Remember if the DC is being fenced
1669  dc_down = stonith_op;
1670 
1671  } else {
1672 
1674  && (stonith_ops != NULL)) {
1675  /* Concurrent fencing is disabled, so order each non-DC
1676  * fencing in a chain. If there is any DC fencing or
1677  * shutdown, it will be ordered after the last action in the
1678  * chain later.
1679  */
1680  order_actions((pe_action_t *) stonith_ops->data,
1681  stonith_op, pe_order_optional);
1682  }
1683 
1684  // Remember all non-DC fencing actions in a separate list
1685  stonith_ops = g_list_prepend(stonith_ops, stonith_op);
1686  }
1687 
1688  } else if (node->details->online && node->details->shutdown &&
1689  /* TODO define what a shutdown op means for a remote node.
1690  * For now we do not send shutdown operations for remote nodes, but
1691  * if we can come up with a good use for this in the future, we will. */
1692  pe__is_guest_or_remote_node(node) == FALSE) {
1693 
1694  pe_action_t *down_op = sched_shutdown_op(node, data_set);
1695 
1696  if (node->details->is_dc) {
1697  // Remember if the DC is being shut down
1698  dc_down = down_op;
1699  } else {
1700  // Remember non-DC shutdowns for later ordering
1701  shutdown_ops = g_list_prepend(shutdown_ops, down_op);
1702  }
1703  }
1704 
1705  if (node->details->unclean && stonith_op == NULL) {
1706  integrity_lost = TRUE;
1707  pe_warn("Node %s is unclean!", node->details->uname);
1708  }
1709  }
1710 
1711  if (integrity_lost) {
1712  if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
1713  pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED");
1714  pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE");
1715 
1716  } else if (!pcmk_is_set(data_set->flags, pe_flag_have_quorum)) {
1717  crm_notice("Cannot fence unclean nodes until quorum is"
1718  " attained (or no-quorum-policy is set to ignore)");
1719  }
1720  }
1721 
1722  if (dc_down != NULL) {
1723  /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated
1724  * DC elections. However, we don't want to order non-DC shutdowns before
1725  * a DC *fencing*, because even though we don't want a node that's
1726  * shutting down to become DC, the DC fencing could be ordered before a
1727  * clone stop that's also ordered before the shutdowns, thus leading to
1728  * a graph loop.
1729  */
1730  if (pcmk__str_eq(dc_down->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
1731  for (gIter = shutdown_ops; gIter != NULL; gIter = gIter->next) {
1732  pe_action_t *node_stop = (pe_action_t *) gIter->data;
1733 
1734  crm_debug("Ordering shutdown on %s before %s on DC %s",
1735  node_stop->node->details->uname,
1736  dc_down->task, dc_down->node->details->uname);
1737 
1738  order_actions(node_stop, dc_down, pe_order_optional);
1739  }
1740  }
1741 
1742  // Order any non-DC fencing before any DC fencing or shutdown
1743 
1744  if (pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing)) {
1745  /* With concurrent fencing, order each non-DC fencing action
1746  * separately before any DC fencing or shutdown.
1747  */
1748  for (gIter = stonith_ops; gIter != NULL; gIter = gIter->next) {
1749  order_actions((pe_action_t *) gIter->data, dc_down,
1751  }
1752  } else if (stonith_ops) {
1753  /* Without concurrent fencing, the non-DC fencing actions are
1754  * already ordered relative to each other, so we just need to order
1755  * the DC fencing after the last action in the chain (which is the
1756  * first item in the list).
1757  */
1758  order_actions((pe_action_t *) stonith_ops->data, dc_down,
1760  }
1761  }
1762  g_list_free(stonith_ops);
1763  g_list_free(shutdown_ops);
1764  return TRUE;
1765 }
1766 
1767 /*
1768  * Determine the sets of independent actions and the correct order for the
1769  * actions in each set.
1770  *
1771  * Mark dependencies of un-runnable actions un-runnable
1772  *
1773  */
1774 static GList *
1775 find_actions_by_task(GList *actions, pe_resource_t * rsc, const char *original_key)
1776 {
1777  GList *list = NULL;
1778 
1779  list = find_actions(actions, original_key, NULL);
1780  if (list == NULL) {
1781  /* we're potentially searching a child of the original resource */
1782  char *key = NULL;
1783  char *task = NULL;
1784  guint interval_ms = 0;
1785 
1786  if (parse_op_key(original_key, NULL, &task, &interval_ms)) {
1787  key = pcmk__op_key(rsc->id, task, interval_ms);
1788  list = find_actions(actions, key, NULL);
1789 
1790  } else {
1791  crm_err("search key: %s", original_key);
1792  }
1793 
1794  free(key);
1795  free(task);
1796  }
1797 
1798  return list;
1799 }
1800 
1801 static void
1802 rsc_order_then(pe_action_t *lh_action, pe_resource_t *rsc,
1803  pe__ordering_t *order)
1804 {
1805  GList *gIter = NULL;
1806  GList *rh_actions = NULL;
1807  pe_action_t *rh_action = NULL;
1808  enum pe_ordering type;
1809 
1810  CRM_CHECK(rsc != NULL, return);
1811  CRM_CHECK(order != NULL, return);
1812 
1813  type = order->type;
1814  rh_action = order->rh_action;
1815  crm_trace("Processing RH of ordering constraint %d", order->id);
1816 
1817  if (rh_action != NULL) {
1818  rh_actions = g_list_prepend(NULL, rh_action);
1819 
1820  } else if (rsc != NULL) {
1821  rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task);
1822  }
1823 
1824  if (rh_actions == NULL) {
1825  pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..."
1826  " ignoring", rsc->id, order->rh_action_task);
1827  if (lh_action) {
1828  pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid);
1829  }
1830  return;
1831  }
1832 
1833  if ((lh_action != NULL) && (lh_action->rsc == rsc)
1834  && pcmk_is_set(lh_action->flags, pe_action_dangle)) {
1835 
1836  pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid,
1837  order->rh_action_task);
1839  }
1840 
1841  gIter = rh_actions;
1842  for (; gIter != NULL; gIter = gIter->next) {
1843  pe_action_t *rh_action_iter = (pe_action_t *) gIter->data;
1844 
1845  if (lh_action) {
1846  order_actions(lh_action, rh_action_iter, type);
1847 
1848  } else if (type & pe_order_implies_then) {
1850  __func__, __LINE__);
1851  crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type);
1852  } else {
1853  crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type);
1854  }
1855  }
1856 
1857  g_list_free(rh_actions);
1858 }
1859 
1860 static void
1861 rsc_order_first(pe_resource_t *lh_rsc, pe__ordering_t *order,
1862  pe_working_set_t *data_set)
1863 {
1864  GList *gIter = NULL;
1865  GList *lh_actions = NULL;
1866  pe_action_t *lh_action = order->lh_action;
1867  pe_resource_t *rh_rsc = order->rh_rsc;
1868 
1869  crm_trace("Processing LH of ordering constraint %d", order->id);
1870  CRM_ASSERT(lh_rsc != NULL);
1871 
1872  if (lh_action != NULL) {
1873  lh_actions = g_list_prepend(NULL, lh_action);
1874 
1875  } else {
1876  lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task);
1877  }
1878 
1879  if (lh_actions == NULL && lh_rsc != rh_rsc) {
1880  char *key = NULL;
1881  char *op_type = NULL;
1882  guint interval_ms = 0;
1883 
1884  parse_op_key(order->lh_action_task, NULL, &op_type, &interval_ms);
1885  key = pcmk__op_key(lh_rsc->id, op_type, interval_ms);
1886 
1887  if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && pcmk__str_eq(op_type, RSC_STOP, pcmk__str_casei)) {
1888  free(key);
1889  pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
1890  lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1891 
1892  } else if ((lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_UNPROMOTED)
1893  && pcmk__str_eq(op_type, RSC_DEMOTE, pcmk__str_casei)) {
1894  free(key);
1895  pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
1896  lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1897 
1898  } else {
1899  pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating",
1900  lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1901  lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set);
1902  lh_actions = g_list_prepend(NULL, lh_action);
1903  }
1904 
1905  free(op_type);
1906  }
1907 
1908  gIter = lh_actions;
1909  for (; gIter != NULL; gIter = gIter->next) {
1910  pe_action_t *lh_action_iter = (pe_action_t *) gIter->data;
1911 
1912  if (rh_rsc == NULL && order->rh_action) {
1913  rh_rsc = order->rh_action->rsc;
1914  }
1915  if (rh_rsc) {
1916  rsc_order_then(lh_action_iter, rh_rsc, order);
1917 
1918  } else if (order->rh_action) {
1919  order_actions(lh_action_iter, order->rh_action, order->type);
1920  }
1921  }
1922 
1923  g_list_free(lh_actions);
1924 }
1925 
1927  pe_working_set_t *data_set);
1928 
1929 static int
1930 is_recurring_action(pe_action_t *action)
1931 {
1932  guint interval_ms;
1933 
1934  if (pcmk__guint_from_hash(action->meta,
1936  &interval_ms) != pcmk_rc_ok) {
1937  return 0;
1938  }
1939  return (interval_ms > 0);
1940 }
1941 
1942 static void
1943 apply_container_ordering(pe_action_t *action, pe_working_set_t *data_set)
1944 {
1945  /* VMs are also classified as containers for these purposes... in
1946  * that they both involve a 'thing' running on a real or remote
1947  * cluster node.
1948  *
1949  * This allows us to be smarter about the type and extent of
1950  * recovery actions required in various scenarios
1951  */
1952  pe_resource_t *remote_rsc = NULL;
1953  pe_resource_t *container = NULL;
1954  enum action_tasks task = text2task(action->task);
1955 
1956  CRM_ASSERT(action->rsc);
1957  CRM_ASSERT(action->node);
1959 
1960  remote_rsc = action->node->details->remote_rsc;
1961  CRM_ASSERT(remote_rsc);
1962 
1963  container = remote_rsc->container;
1964  CRM_ASSERT(container);
1965 
1966  if (pcmk_is_set(container->flags, pe_rsc_failed)) {
1967  pe_fence_node(data_set, action->node, "container failed", FALSE);
1968  }
1969 
1970  crm_trace("Order %s action %s relative to %s%s for %s%s",
1971  action->task, action->uuid,
1972  pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
1973  remote_rsc->id,
1974  pcmk_is_set(container->flags, pe_rsc_failed)? "failed " : "",
1975  container->id);
1976 
1978  /* Migration ops map to "no_action", but we need to apply the same
1979  * ordering as for stop or demote (see get_router_node()).
1980  */
1981  task = stop_rsc;
1982  }
1983 
1984  switch (task) {
1985  case start_rsc:
1986  case action_promote:
1987  /* Force resource recovery if the container is recovered */
1988  order_start_then_action(container, action, pe_order_implies_then,
1989  data_set);
1990 
1991  /* Wait for the connection resource to be up too */
1992  order_start_then_action(remote_rsc, action, pe_order_none,
1993  data_set);
1994  break;
1995 
1996  case stop_rsc:
1997  case action_demote:
1998  if (pcmk_is_set(container->flags, pe_rsc_failed)) {
1999  /* When the container representing a guest node fails, any stop
2000  * or demote actions for resources running on the guest node
2001  * are implied by the container stopping. This is similar to
2002  * how fencing operations work for cluster nodes and remote
2003  * nodes.
2004  */
2005  } else {
2006  /* Ensure the operation happens before the connection is brought
2007  * down.
2008  *
2009  * If we really wanted to, we could order these after the
2010  * connection start, IFF the container's current role was
2011  * stopped (otherwise we re-introduce an ordering loop when the
2012  * connection is restarting).
2013  */
2014  order_action_then_stop(action, remote_rsc, pe_order_none,
2015  data_set);
2016  }
2017  break;
2018 
2019  default:
2020  /* Wait for the connection resource to be up */
2021  if (is_recurring_action(action)) {
2022  /* In case we ever get the recovery logic wrong, force
2023  * recurring monitors to be restarted, even if just
2024  * the connection was re-established
2025  */
2026  if(task != no_action) {
2027  order_start_then_action(remote_rsc, action,
2028  pe_order_implies_then, data_set);
2029  }
2030  } else {
2031  order_start_then_action(remote_rsc, action, pe_order_none,
2032  data_set);
2033  }
2034  break;
2035  }
2036 }
2037 
2038 static enum remote_connection_state
2039 get_remote_node_state(pe_node_t *node)
2040 {
2041  pe_resource_t *remote_rsc = NULL;
2042  pe_node_t *cluster_node = NULL;
2043 
2044  CRM_ASSERT(node);
2045 
2046  remote_rsc = node->details->remote_rsc;
2047  CRM_ASSERT(remote_rsc);
2048 
2049  cluster_node = pe__current_node(remote_rsc);
2050 
2051  /* If the cluster node the remote connection resource resides on
2052  * is unclean or went offline, we can't process any operations
2053  * on that remote node until after it starts elsewhere.
2054  */
2055  if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) {
2056  /* The connection resource is not going to run anywhere */
2057 
2058  if (cluster_node && cluster_node->details->unclean) {
2059  /* The remote connection is failed because its resource is on a
2060  * failed node and can't be recovered elsewhere, so we must fence.
2061  */
2062  return remote_state_failed;
2063  }
2064 
2065  if (!pcmk_is_set(remote_rsc->flags, pe_rsc_failed)) {
2066  /* Connection resource is cleanly stopped */
2067  return remote_state_stopped;
2068  }
2069 
2070  /* Connection resource is failed */
2071 
2072  if ((remote_rsc->next_role == RSC_ROLE_STOPPED)
2073  && remote_rsc->remote_reconnect_ms
2074  && node->details->remote_was_fenced
2075  && !pe__shutdown_requested(node)) {
2076 
2077  /* We won't know whether the connection is recoverable until the
2078  * reconnect interval expires and we reattempt connection.
2079  */
2080  return remote_state_unknown;
2081  }
2082 
2083  /* The remote connection is in a failed state. If there are any
2084  * resources known to be active on it (stop) or in an unknown state
2085  * (probe), we must assume the worst and fence it.
2086  */
2087  return remote_state_failed;
2088 
2089  } else if (cluster_node == NULL) {
2090  /* Connection is recoverable but not currently running anywhere, see if we can recover it first */
2091  return remote_state_unknown;
2092 
2093  } else if(cluster_node->details->unclean == TRUE
2094  || cluster_node->details->online == FALSE) {
2095  /* Connection is running on a dead node, see if we can recover it first */
2096  return remote_state_resting;
2097 
2098  } else if (pcmk__list_of_multiple(remote_rsc->running_on)
2099  && remote_rsc->partial_migration_source
2100  && remote_rsc->partial_migration_target) {
2101  /* We're in the middle of migrating a connection resource,
2102  * wait until after the resource migrates before performing
2103  * any actions.
2104  */
2105  return remote_state_resting;
2106 
2107  }
2108  return remote_state_alive;
2109 }
2110 
2115 static void
2116 apply_remote_ordering(pe_action_t *action, pe_working_set_t *data_set)
2117 {
2118  pe_resource_t *remote_rsc = NULL;
2119  enum action_tasks task = text2task(action->task);
2120  enum remote_connection_state state = get_remote_node_state(action->node);
2121 
2122  enum pe_ordering order_opts = pe_order_none;
2123 
2124  if (action->rsc == NULL) {
2125  return;
2126  }
2127 
2128  CRM_ASSERT(action->node);
2130 
2131  remote_rsc = action->node->details->remote_rsc;
2132  CRM_ASSERT(remote_rsc);
2133 
2134  crm_trace("Order %s action %s relative to %s%s (state: %s)",
2135  action->task, action->uuid,
2136  pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
2137  remote_rsc->id, state2text(state));
2138 
2140  /* Migration ops map to "no_action", but we need to apply the same
2141  * ordering as for stop or demote (see get_router_node()).
2142  */
2143  task = stop_rsc;
2144  }
2145 
2146  switch (task) {
2147  case start_rsc:
2148  case action_promote:
2149  order_opts = pe_order_none;
2150 
2151  if (state == remote_state_failed) {
2152  /* Force recovery, by making this action required */
2154  }
2155 
2156  /* Ensure connection is up before running this action */
2157  order_start_then_action(remote_rsc, action, order_opts, data_set);
2158  break;
2159 
2160  case stop_rsc:
2161  if(state == remote_state_alive) {
2162  order_action_then_stop(action, remote_rsc,
2163  pe_order_implies_first, data_set);
2164 
2165  } else if(state == remote_state_failed) {
2166  /* The resource is active on the node, but since we don't have a
2167  * valid connection, the only way to stop the resource is by
2168  * fencing the node. There is no need to order the stop relative
2169  * to the remote connection, since the stop will become implied
2170  * by the fencing.
2171  */
2172  pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable", FALSE);
2173 
2174  } else if(remote_rsc->next_role == RSC_ROLE_STOPPED) {
2175  /* State must be remote_state_unknown or remote_state_stopped.
2176  * Since the connection is not coming back up in this
2177  * transition, stop this resource first.
2178  */
2179  order_action_then_stop(action, remote_rsc,
2180  pe_order_implies_first, data_set);
2181 
2182  } else {
2183  /* The connection is going to be started somewhere else, so
2184  * stop this resource after that completes.
2185  */
2186  order_start_then_action(remote_rsc, action, pe_order_none, data_set);
2187  }
2188  break;
2189 
2190  case action_demote:
2191  /* Only order this demote relative to the connection start if the
2192  * connection isn't being torn down. Otherwise, the demote would be
2193  * blocked because the connection start would not be allowed.
2194  */
2195  if(state == remote_state_resting || state == remote_state_unknown) {
2196  order_start_then_action(remote_rsc, action, pe_order_none,
2197  data_set);
2198  } /* Otherwise we can rely on the stop ordering */
2199  break;
2200 
2201  default:
2202  /* Wait for the connection resource to be up */
2203  if (is_recurring_action(action)) {
2204  /* In case we ever get the recovery logic wrong, force
2205  * recurring monitors to be restarted, even if just
2206  * the connection was re-established
2207  */
2208  order_start_then_action(remote_rsc, action,
2209  pe_order_implies_then, data_set);
2210 
2211  } else {
2212  pe_node_t *cluster_node = pe__current_node(remote_rsc);
2213 
2214  if(task == monitor_rsc && state == remote_state_failed) {
2215  /* We would only be here if we do not know the
2216  * state of the resource on the remote node.
2217  * Since we have no way to find out, it is
2218  * necessary to fence the node.
2219  */
2220  pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable", FALSE);
2221  }
2222 
2223  if(cluster_node && state == remote_state_stopped) {
2224  /* The connection is currently up, but is going
2225  * down permanently.
2226  *
2227  * Make sure we check services are actually
2228  * stopped _before_ we let the connection get
2229  * closed
2230  */
2231  order_action_then_stop(action, remote_rsc,
2232  pe_order_runnable_left, data_set);
2233 
2234  } else {
2235  order_start_then_action(remote_rsc, action, pe_order_none,
2236  data_set);
2237  }
2238  }
2239  break;
2240  }
2241 }
2242 
2243 static void
2244 apply_remote_node_ordering(pe_working_set_t *data_set)
2245 {
2246  if (!pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) {
2247  return;
2248  }
2249 
2250  for (GList *gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2251  pe_action_t *action = (pe_action_t *) gIter->data;
2252  pe_resource_t *remote = NULL;
2253 
2254  // We are only interested in resource actions
2255  if (action->rsc == NULL) {
2256  continue;
2257  }
2258 
2259  /* Special case: If we are clearing the failcount of an actual
2260  * remote connection resource, then make sure this happens before
2261  * any start of the resource in this transition.
2262  */
2263  if (action->rsc->is_remote_node &&
2264  pcmk__str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT, pcmk__str_casei)) {
2265 
2266  custom_action_order(action->rsc,
2267  NULL,
2268  action,
2269  action->rsc,
2270  pcmk__op_key(action->rsc->id, RSC_START, 0),
2271  NULL,
2273  data_set);
2274 
2275  continue;
2276  }
2277 
2278  // We are only interested in actions allocated to a node
2279  if (action->node == NULL) {
2280  continue;
2281  }
2282 
2283  if (!pe__is_guest_or_remote_node(action->node)) {
2284  continue;
2285  }
2286 
2287  /* We are only interested in real actions.
2288  *
2289  * @TODO This is probably wrong; pseudo-actions might be converted to
2290  * real actions and vice versa later in update_actions() at the end of
2291  * stage7().
2292  */
2293  if (pcmk_is_set(action->flags, pe_action_pseudo)) {
2294  continue;
2295  }
2296 
2297  remote = action->node->details->remote_rsc;
2298  if (remote == NULL) {
2299  // Orphaned
2300  continue;
2301  }
2302 
2303  /* Another special case: if a resource is moving to a Pacemaker Remote
2304  * node, order the stop on the original node after any start of the
2305  * remote connection. This ensures that if the connection fails to
2306  * start, we leave the resource running on the original node.
2307  */
2308  if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) {
2309  for (GList *item = action->rsc->actions; item != NULL;
2310  item = item->next) {
2311  pe_action_t *rsc_action = item->data;
2312 
2313  if ((rsc_action->node->details != action->node->details)
2314  && pcmk__str_eq(rsc_action->task, RSC_STOP, pcmk__str_casei)) {
2315  custom_action_order(remote, start_key(remote), NULL,
2316  action->rsc, NULL, rsc_action,
2317  pe_order_optional, data_set);
2318  }
2319  }
2320  }
2321 
2322  /* The action occurs across a remote connection, so create
2323  * ordering constraints that guarantee the action occurs while the node
2324  * is active (after start, before stop ... things like that).
2325  *
2326  * This is somewhat brittle in that we need to make sure the results of
2327  * this ordering are compatible with the result of get_router_node().
2328  * It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part
2329  * of this logic rather than action2xml().
2330  */
2331  if (remote->container) {
2332  crm_trace("Container ordering for %s", action->uuid);
2333  apply_container_ordering(action, data_set);
2334 
2335  } else {
2336  crm_trace("Remote ordering for %s", action->uuid);
2337  apply_remote_ordering(action, data_set);
2338  }
2339  }
2340 }
2341 
2342 static gboolean
2343 order_first_probe_unneeded(pe_action_t * probe, pe_action_t * rh_action)
2344 {
2345  /* No need to probe the resource on the node that is being
2346  * unfenced. Otherwise it might introduce transition loop
2347  * since probe will be performed after the node is
2348  * unfenced.
2349  */
2350  if (pcmk__str_eq(rh_action->task, CRM_OP_FENCE, pcmk__str_casei)
2351  && probe->node && rh_action->node
2352  && probe->node->details == rh_action->node->details) {
2353  const char *op = g_hash_table_lookup(rh_action->meta, "stonith_action");
2354 
2355  if (pcmk__str_eq(op, "on", pcmk__str_casei)) {
2356  return TRUE;
2357  }
2358  }
2359 
2360  // Shutdown waits for probe to complete only if it's on the same node
2361  if ((pcmk__str_eq(rh_action->task, CRM_OP_SHUTDOWN, pcmk__str_casei))
2362  && probe->node && rh_action->node
2363  && probe->node->details != rh_action->node->details) {
2364  return TRUE;
2365  }
2366  return FALSE;
2367 }
2368 
2369 static void
2370 order_first_probes_imply_stops(pe_working_set_t * data_set)
2371 {
2372  GList *gIter = NULL;
2373 
2374  for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
2375  pe__ordering_t *order = gIter->data;
2376  enum pe_ordering order_type = pe_order_optional;
2377 
2378  pe_resource_t *lh_rsc = order->lh_rsc;
2379  pe_resource_t *rh_rsc = order->rh_rsc;
2380  pe_action_t *lh_action = order->lh_action;
2381  pe_action_t *rh_action = order->rh_action;
2382  const char *lh_action_task = order->lh_action_task;
2383  const char *rh_action_task = order->rh_action_task;
2384 
2385  GList *probes = NULL;
2386  GList *rh_actions = NULL;
2387 
2388  GList *pIter = NULL;
2389 
2390  if (lh_rsc == NULL) {
2391  continue;
2392 
2393  } else if (rh_rsc && lh_rsc == rh_rsc) {
2394  continue;
2395  }
2396 
2397  if (lh_action == NULL && lh_action_task == NULL) {
2398  continue;
2399  }
2400 
2401  if (rh_action == NULL && rh_action_task == NULL) {
2402  continue;
2403  }
2404 
2405  /* Technically probe is expected to return "not running", which could be
2406  * the alternative of stop action if the status of the resource is
2407  * unknown yet.
2408  */
2409  if (lh_action && !pcmk__str_eq(lh_action->task, RSC_STOP, pcmk__str_casei)) {
2410  continue;
2411 
2412  } else if (lh_action == NULL
2413  && lh_action_task
2414  && !pcmk__ends_with(lh_action_task, "_" RSC_STOP "_0")) {
2415  continue;
2416  }
2417 
2418  /* Do not probe the resource inside of a stopping container. Otherwise
2419  * it might introduce transition loop since probe will be performed
2420  * after the container starts again.
2421  */
2422  if (rh_rsc && lh_rsc->container == rh_rsc) {
2423  if (rh_action && pcmk__str_eq(rh_action->task, RSC_STOP, pcmk__str_casei)) {
2424  continue;
2425 
2426  } else if (rh_action == NULL && rh_action_task
2427  && pcmk__ends_with(rh_action_task,"_" RSC_STOP "_0")) {
2428  continue;
2429  }
2430  }
2431 
2432  if (order->type == pe_order_none) {
2433  continue;
2434  }
2435 
2436  // Preserve the order options for future filtering
2438  pe__set_order_flags(order_type,
2440  }
2441 
2442  if (pcmk_is_set(order->type, pe_order_same_node)) {
2444  }
2445 
2446  // Keep the order types for future filtering
2447  if (order->type == pe_order_anti_colocation
2448  || order->type == pe_order_load) {
2449  order_type = order->type;
2450  }
2451 
2452  probes = pe__resource_actions(lh_rsc, NULL, RSC_STATUS, FALSE);
2453  if (probes == NULL) {
2454  continue;
2455  }
2456 
2457  if (rh_action) {
2458  rh_actions = g_list_prepend(rh_actions, rh_action);
2459 
2460  } else if (rh_rsc && rh_action_task) {
2461  rh_actions = find_actions(rh_rsc->actions, rh_action_task, NULL);
2462  }
2463 
2464  if (rh_actions == NULL) {
2465  g_list_free(probes);
2466  continue;
2467  }
2468 
2469  crm_trace("Processing for LH probe based on ordering constraint %s -> %s"
2470  " (id=%d, type=%.6x)",
2471  lh_action ? lh_action->uuid : lh_action_task,
2472  rh_action ? rh_action->uuid : rh_action_task,
2473  order->id, order->type);
2474 
2475  for (pIter = probes; pIter != NULL; pIter = pIter->next) {
2476  pe_action_t *probe = (pe_action_t *) pIter->data;
2477  GList *rIter = NULL;
2478 
2479  for (rIter = rh_actions; rIter != NULL; rIter = rIter->next) {
2480  pe_action_t *rh_action_iter = (pe_action_t *) rIter->data;
2481 
2482  if (order_first_probe_unneeded(probe, rh_action_iter)) {
2483  continue;
2484  }
2485  order_actions(probe, rh_action_iter, order_type);
2486  }
2487  }
2488 
2489  g_list_free(rh_actions);
2490  g_list_free(probes);
2491  }
2492 }
2493 
2494 static void
2495 order_first_probe_then_restart_repromote(pe_action_t * probe,
2496  pe_action_t * after,
2497  pe_working_set_t * data_set)
2498 {
2499  GList *gIter = NULL;
2500  bool interleave = FALSE;
2501  pe_resource_t *compatible_rsc = NULL;
2502 
2503  if (probe == NULL
2504  || probe->rsc == NULL
2505  || probe->rsc->variant != pe_native) {
2506  return;
2507  }
2508 
2509  if (after == NULL
2510  // Avoid running into any possible loop
2511  || pcmk_is_set(after->flags, pe_action_tracking)) {
2512  return;
2513  }
2514 
2515  if (!pcmk__str_eq(probe->task, RSC_STATUS, pcmk__str_casei)) {
2516  return;
2517  }
2518 
2520 
2521  crm_trace("Processing based on %s %s -> %s %s",
2522  probe->uuid,
2523  probe->node ? probe->node->details->uname: "",
2524  after->uuid,
2525  after->node ? after->node->details->uname : "");
2526 
2527  if (after->rsc
2528  /* Better not build a dependency directly with a clone/group.
2529  * We are going to proceed through the ordering chain and build
2530  * dependencies with its children.
2531  */
2532  && after->rsc->variant == pe_native
2533  && probe->rsc != after->rsc) {
2534 
2535  GList *then_actions = NULL;
2536  enum pe_ordering probe_order_type = pe_order_optional;
2537 
2538  if (pcmk__str_eq(after->task, RSC_START, pcmk__str_casei)) {
2539  then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP, FALSE);
2540 
2541  } else if (pcmk__str_eq(after->task, RSC_PROMOTE, pcmk__str_casei)) {
2542  then_actions = pe__resource_actions(after->rsc, NULL, RSC_DEMOTE, FALSE);
2543  }
2544 
2545  for (gIter = then_actions; gIter != NULL; gIter = gIter->next) {
2546  pe_action_t *then = (pe_action_t *) gIter->data;
2547 
2548  // Skip any pseudo action which for example is implied by fencing
2549  if (pcmk_is_set(then->flags, pe_action_pseudo)) {
2550  continue;
2551  }
2552 
2553  order_actions(probe, then, probe_order_type);
2554  }
2555  g_list_free(then_actions);
2556  }
2557 
2558  if (after->rsc
2559  && after->rsc->variant > pe_group) {
2560  const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
2562 
2563  interleave = crm_is_true(interleave_s);
2564 
2565  if (interleave) {
2566  /* For an interleaved clone, we should build a dependency only
2567  * with the relevant clone child.
2568  */
2569  compatible_rsc = find_compatible_child(probe->rsc,
2570  after->rsc,
2572  FALSE, data_set);
2573  }
2574  }
2575 
2576  for (gIter = after->actions_after; gIter != NULL; gIter = gIter->next) {
2577  pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) gIter->data;
2578  /* pe_order_implies_then is the reason why a required A.start
2579  * implies/enforces B.start to be required too, which is the cause of
2580  * B.restart/re-promote.
2581  *
2582  * Not sure about pe_order_implies_then_on_node though. It's now only
2583  * used for unfencing case, which tends to introduce transition
2584  * loops...
2585  */
2586 
2587  if (!pcmk_is_set(after_wrapper->type, pe_order_implies_then)) {
2588  /* The order type between a group/clone and its child such as
2589  * B.start-> B_child.start is:
2590  * pe_order_implies_first_printed | pe_order_runnable_left
2591  *
2592  * Proceed through the ordering chain and build dependencies with
2593  * its children.
2594  */
2595  if (after->rsc == NULL
2596  || after->rsc->variant < pe_group
2597  || probe->rsc->parent == after->rsc
2598  || after_wrapper->action->rsc == NULL
2599  || after_wrapper->action->rsc->variant > pe_group
2600  || after->rsc != after_wrapper->action->rsc->parent) {
2601  continue;
2602  }
2603 
2604  /* Proceed to the children of a group or a non-interleaved clone.
2605  * For an interleaved clone, proceed only to the relevant child.
2606  */
2607  if (after->rsc->variant > pe_group
2608  && interleave == TRUE
2609  && (compatible_rsc == NULL
2610  || compatible_rsc != after_wrapper->action->rsc)) {
2611  continue;
2612  }
2613  }
2614 
2615  crm_trace("Proceeding through %s %s -> %s %s (type=0x%.6x)",
2616  after->uuid,
2617  after->node ? after->node->details->uname: "",
2618  after_wrapper->action->uuid,
2619  after_wrapper->action->node ? after_wrapper->action->node->details->uname : "",
2620  after_wrapper->type);
2621 
2622  order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
2623  }
2624 }
2625 
2626 static void clear_actions_tracking_flag(pe_working_set_t * data_set)
2627 {
2628  GList *gIter = NULL;
2629 
2630  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2631  pe_action_t *action = (pe_action_t *) gIter->data;
2632 
2633  if (pcmk_is_set(action->flags, pe_action_tracking)) {
2635  }
2636  }
2637 }
2638 
2639 static void
2640 order_first_rsc_probes(pe_resource_t * rsc, pe_working_set_t * data_set)
2641 {
2642  GList *gIter = NULL;
2643  GList *probes = NULL;
2644 
2645  for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
2646  pe_resource_t * child = (pe_resource_t *) gIter->data;
2647 
2648  order_first_rsc_probes(child, data_set);
2649  }
2650 
2651  if (rsc->variant != pe_native) {
2652  return;
2653  }
2654 
2655  probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
2656 
2657  for (gIter = probes; gIter != NULL; gIter= gIter->next) {
2658  pe_action_t *probe = (pe_action_t *) gIter->data;
2659  GList *aIter = NULL;
2660 
2661  for (aIter = probe->actions_after; aIter != NULL; aIter = aIter->next) {
2662  pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) aIter->data;
2663 
2664  order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
2665  clear_actions_tracking_flag(data_set);
2666  }
2667  }
2668 
2669  g_list_free(probes);
2670 }
2671 
2672 static void
2673 order_first_probes(pe_working_set_t * data_set)
2674 {
2675  GList *gIter = NULL;
2676 
2677  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2678  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
2679 
2680  order_first_rsc_probes(rsc, data_set);
2681  }
2682 
2683  order_first_probes_imply_stops(data_set);
2684 }
2685 
2686 static void
2687 order_then_probes(pe_working_set_t * data_set)
2688 {
2689 #if 0
2690  GList *gIter = NULL;
2691 
2692  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2693  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
2694 
2695  /* Given "A then B", we would prefer to wait for A to be
2696  * started before probing B.
2697  *
2698  * If A was a filesystem on which the binaries and data for B
2699  * lived, it would have been useful if the author of B's agent
2700  * could assume that A is running before B.monitor will be
2701  * called.
2702  *
2703  * However we can't _only_ probe once A is running, otherwise
2704  * we'd not detect the state of B if A could not be started
2705  * for some reason.
2706  *
2707  * In practice however, we cannot even do an opportunistic
2708  * version of this because B may be moving:
2709  *
2710  * B.probe -> B.start
2711  * B.probe -> B.stop
2712  * B.stop -> B.start
2713  * A.stop -> A.start
2714  * A.start -> B.probe
2715  *
2716  * So far so good, but if we add the result of this code:
2717  *
2718  * B.stop -> A.stop
2719  *
2720  * Then we get a loop:
2721  *
2722  * B.probe -> B.stop -> A.stop -> A.start -> B.probe
2723  *
2724  * We could kill the 'B.probe -> B.stop' dependency, but that
2725  * could mean stopping B "too" soon, because B.start must wait
2726  * for the probes to complete.
2727  *
2728  * Another option is to allow it only if A is a non-unique
2729  * clone with clone-max == node-max (since we'll never be
2730  * moving it). However, we could still be stopping one
2731  * instance at the same time as starting another.
2732 
2733  * The complexity of checking for allowed conditions combined
2734  * with the ever narrowing usecase suggests that this code
2735  * should remain disabled until someone gets smarter.
2736  */
2737  pe_action_t *start = NULL;
2738  GList *actions = NULL;
2739  GList *probes = NULL;
2740 
2741  actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE);
2742 
2743  if (actions) {
2744  start = actions->data;
2745  g_list_free(actions);
2746  }
2747 
2748  if(start == NULL) {
2749  crm_err("No start action for %s", rsc->id);
2750  continue;
2751  }
2752 
2753  probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
2754 
2755  for (actions = start->actions_before; actions != NULL; actions = actions->next) {
2756  pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data;
2757 
2758  GList *pIter = NULL;
2759  pe_action_t *first = before->action;
2760  pe_resource_t *first_rsc = first->rsc;
2761 
2762  if(first->required_runnable_before) {
2763  GList *clone_actions = NULL;
2764  for (clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) {
2765  before = (pe_action_wrapper_t *) clone_actions->data;
2766 
2767  crm_trace("Testing %s -> %s (%p) for %s", first->uuid, before->action->uuid, before->action->rsc, start->uuid);
2768 
2769  CRM_ASSERT(before->action->rsc);
2770  first_rsc = before->action->rsc;
2771  break;
2772  }
2773 
2774  } else if(!pcmk__str_eq(first->task, RSC_START, pcmk__str_casei)) {
2775  crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
2776  }
2777 
2778  if(first_rsc == NULL) {
2779  continue;
2780 
2781  } else if(uber_parent(first_rsc) == uber_parent(start->rsc)) {
2782  crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
2783  continue;
2784 
2785  } else if(FALSE && pe_rsc_is_clone(uber_parent(first_rsc)) == FALSE) {
2786  crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
2787  continue;
2788  }
2789 
2790  crm_err("Applying %s before %s %d", first->uuid, start->uuid, uber_parent(first_rsc)->variant);
2791 
2792  for (pIter = probes; pIter != NULL; pIter = pIter->next) {
2793  pe_action_t *probe = (pe_action_t *) pIter->data;
2794 
2795  crm_err("Ordering %s before %s", first->uuid, probe->uuid);
2796  order_actions(first, probe, pe_order_optional);
2797  }
2798  }
2799  }
2800 #endif
2801 }
2802 
2803 static void
2804 order_probes(pe_working_set_t * data_set)
2805 {
2806  order_first_probes(data_set);
2807  order_then_probes(data_set);
2808 }
2809 
2810 gboolean
2812 {
2813  pcmk__output_t *prev_out = data_set->priv;
2814  pcmk__output_t *out = NULL;
2815  GList *gIter = NULL;
2816 
2817  crm_trace("Applying ordering constraints");
2818 
2819  /* Don't ask me why, but apparently they need to be processed in
2820  * the order they were created in... go figure
2821  *
2822  * Also g_list_append() has horrendous performance characteristics
2823  * So we need to use g_list_prepend() and then reverse the list here
2824  */
2825  data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints);
2826 
2827  for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
2828  pe__ordering_t *order = gIter->data;
2829  pe_resource_t *rsc = order->lh_rsc;
2830 
2831  crm_trace("Applying ordering constraint: %d", order->id);
2832 
2833  if (rsc != NULL) {
2834  crm_trace("rsc_action-to-*");
2835  rsc_order_first(rsc, order, data_set);
2836  continue;
2837  }
2838 
2839  rsc = order->rh_rsc;
2840  if (rsc != NULL) {
2841  crm_trace("action-to-rsc_action");
2842  rsc_order_then(order->lh_action, rsc, order);
2843 
2844  } else {
2845  crm_trace("action-to-action");
2846  order_actions(order->lh_action, order->rh_action, order->type);
2847  }
2848  }
2849 
2850  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2851  pe_action_t *action = (pe_action_t *) gIter->data;
2852 
2853  update_colo_start_chain(action, data_set);
2854  }
2855 
2856  crm_trace("Ordering probes");
2857  order_probes(data_set);
2858 
2859  crm_trace("Updating %d actions", g_list_length(data_set->actions));
2860  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2861  pe_action_t *action = (pe_action_t *) gIter->data;
2862 
2863  update_action(action, data_set);
2864  }
2865 
2866  // Check for invalid orderings
2867  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2868  pe_action_t *action = (pe_action_t *) gIter->data;
2869  pe_action_wrapper_t *input = NULL;
2870 
2871  for (GList *input_iter = action->actions_before;
2872  input_iter != NULL; input_iter = input_iter->next) {
2873 
2874  input = (pe_action_wrapper_t *) input_iter->data;
2875  if (pcmk__ordering_is_invalid(action, input)) {
2876  input->type = pe_order_none;
2877  }
2878  }
2879  }
2880 
2881  /* stage7 only ever outputs to the log, so ignore whatever output object was
2882  * previously set and just log instead.
2883  */
2884  out = pcmk__new_logger();
2885  if (out == NULL) {
2886  return FALSE;
2887  }
2888 
2889  pcmk__output_set_log_level(out, LOG_NOTICE);
2890  data_set->priv = out;
2891 
2892  out->begin_list(out, NULL, NULL, "Actions");
2893  LogNodeActions(data_set);
2894 
2895  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2896  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
2897 
2898  LogActions(rsc, data_set);
2899  }
2900 
2901  out->end_list(out);
2902  out->finish(out, CRM_EX_OK, true, NULL);
2903  pcmk__output_free(out);
2904 
2905  data_set->priv = prev_out;
2906  return TRUE;
2907 }
2908 
2909 static int transition_id = -1;
2910 
2917 void
2918 pcmk__log_transition_summary(const char *filename)
2919 {
2920  if (was_processing_error) {
2921  crm_err("Calculated transition %d (with errors)%s%s",
2922  transition_id,
2923  (filename == NULL)? "" : ", saving inputs in ",
2924  (filename == NULL)? "" : filename);
2925 
2926  } else if (was_processing_warning) {
2927  crm_warn("Calculated transition %d (with warnings)%s%s",
2928  transition_id,
2929  (filename == NULL)? "" : ", saving inputs in ",
2930  (filename == NULL)? "" : filename);
2931 
2932  } else {
2933  crm_notice("Calculated transition %d%s%s",
2934  transition_id,
2935  (filename == NULL)? "" : ", saving inputs in ",
2936  (filename == NULL)? "" : filename);
2937  }
2938  if (crm_config_error) {
2939  crm_notice("Configuration errors found during scheduler processing,"
2940  " please run \"crm_verify -L\" to identify issues");
2941  }
2942 }
2943 
2944 /*
2945  * Create a dependency graph to send to the transitioner (via the controller)
2946  */
2947 gboolean
2949 {
2950  GList *gIter = NULL;
2951  const char *value = NULL;
2952  long long limit = 0LL;
2953 
2954  transition_id++;
2955  crm_trace("Creating transition graph %d.", transition_id);
2956 
2957  data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
2958 
2959  value = pe_pref(data_set->config_hash, "cluster-delay");
2960  crm_xml_add(data_set->graph, "cluster-delay", value);
2961 
2962  value = pe_pref(data_set->config_hash, "stonith-timeout");
2963  crm_xml_add(data_set->graph, "stonith-timeout", value);
2964 
2965  crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY");
2966 
2967  if (pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)) {
2968  crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY");
2969  } else {
2970  crm_xml_add(data_set->graph, "failed-start-offset", "1");
2971  }
2972 
2973  value = pe_pref(data_set->config_hash, "batch-limit");
2974  crm_xml_add(data_set->graph, "batch-limit", value);
2975 
2976  crm_xml_add_int(data_set->graph, "transition_id", transition_id);
2977 
2978  value = pe_pref(data_set->config_hash, "migration-limit");
2979  if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) {
2980  crm_xml_add(data_set->graph, "migration-limit", value);
2981  }
2982 
2983  if (data_set->recheck_by > 0) {
2984  char *recheck_epoch = NULL;
2985 
2986  recheck_epoch = crm_strdup_printf("%llu",
2987  (long long) data_set->recheck_by);
2988  crm_xml_add(data_set->graph, "recheck-by", recheck_epoch);
2989  free(recheck_epoch);
2990  }
2991 
2992 /* errors...
2993  slist_iter(action, pe_action_t, action_list, lpc,
2994  if(action->optional == FALSE && action->runnable == FALSE) {
2995  print_action("Ignoring", action, TRUE);
2996  }
2997  );
2998 */
2999 
3000  /* The following code will de-duplicate action inputs, so nothing past this
3001  * should rely on the action input type flags retaining their original
3002  * values.
3003  */
3004 
3005  gIter = data_set->resources;
3006  for (; gIter != NULL; gIter = gIter->next) {
3007  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
3008 
3009  pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id);
3010  rsc->cmds->expand(rsc, data_set);
3011  }
3012 
3013  crm_log_xml_trace(data_set->graph, "created resource-driven action list");
3014 
3015  /* pseudo action to distribute list of nodes with maintenance state update */
3016  add_maintenance_update(data_set);
3017 
3018  /* catch any non-resource specific actions */
3019  crm_trace("processing non-resource actions");
3020 
3021  gIter = data_set->actions;
3022  for (; gIter != NULL; gIter = gIter->next) {
3023  pe_action_t *action = (pe_action_t *) gIter->data;
3024 
3025  if (action->rsc
3026  && action->node
3027  && action->node->details->shutdown
3028  && !pcmk_is_set(action->rsc->flags, pe_rsc_maintenance)
3029  && !pcmk_any_flags_set(action->flags,
3031  && pcmk__str_eq(action->task, RSC_STOP, pcmk__str_none)
3032  ) {
3033  /* Eventually we should just ignore the 'fence' case
3034  * But for now it's the best way to detect (in CTS) when
3035  * CIB resource updates are being lost
3036  */
3037  if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)
3038  || data_set->no_quorum_policy == no_quorum_ignore) {
3039  crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)",
3040  action->node->details->unclean ? "fence" : "shut down",
3041  action->node->details->uname, action->rsc->id,
3042  pcmk_is_set(action->rsc->flags, pe_rsc_managed)? " blocked" : " unmanaged",
3043  pcmk_is_set(action->rsc->flags, pe_rsc_failed)? " failed" : "",
3044  action->uuid);
3045  }
3046  }
3047 
3048  graph_element_from_action(action, data_set);
3049  }
3050 
3051  crm_log_xml_trace(data_set->graph, "created generic action list");
3052  crm_trace("Created transition graph %d.", transition_id);
3053 
3054  return TRUE;
3055 }
3056 
3057 void
3059 {
3060  pcmk__output_t *out = data_set->priv;
3061  GList *gIter = NULL;
3062 
3063  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
3064  char *node_name = NULL;
3065  char *task = NULL;
3066  pe_action_t *action = (pe_action_t *) gIter->data;
3067 
3068  if (action->rsc != NULL) {
3069  continue;
3070  } else if (pcmk_is_set(action->flags, pe_action_optional)) {
3071  continue;
3072  }
3073 
3074  if (pe__is_guest_node(action->node)) {
3075  node_name = crm_strdup_printf("%s (resource: %s)", action->node->details->uname, action->node->details->remote_rsc->container->id);
3076  } else if(action->node) {
3077  node_name = crm_strdup_printf("%s", action->node->details->uname);
3078  }
3079 
3080 
3081  if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
3082  task = strdup("Shutdown");
3083  } else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_casei)) {
3084  const char *op = g_hash_table_lookup(action->meta, "stonith_action");
3085  task = crm_strdup_printf("Fence (%s)", op);
3086  }
3087 
3088  out->message(out, "node-action", task, node_name, action->reason);
3089 
3090  free(node_name);
3091  free(task);
3092  }
3093 }
pe_action_t * pe_cancel_op(pe_resource_t *rsc, const char *name, guint interval_ms, pe_node_t *node, pe_working_set_t *data_set)
void pe__foreach_param_check(pe_working_set_t *data_set, void(*cb)(pe_resource_t *, pe_node_t *, xmlNode *, enum pe_check_parameters, pe_working_set_t *))
Definition: remote.c:246
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:218
#define pe__clear_action_flags_as(function, line, action, flags_to_clear)
Definition: internal.h:102
xmlNode * find_xml_node(xmlNode *cib, const char *node_path, gboolean must_find)
Definition: xml.c:446
void group_append_meta(pe_resource_t *rsc, xmlNode *xml)
enum pe_action_flags clone_action_flags(pe_action_t *action, pe_node_t *node)
pe_action_t * lh_action
Definition: internal.h:182
enum pe_quorum_policy no_quorum_policy
Definition: pe_types.h:149
#define RSC_STOP
Definition: crm.h:204
void clone_append_meta(pe_resource_t *rsc, xmlNode *xml)
void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set)
A dumping ground.
gboolean parse_op_key(const char *key, char **rsc_id, char **op_type, guint *interval_ms)
Definition: operations.c:185
#define crm_notice(fmt, args...)
Definition: logging.h:352
#define CRMD_ACTION_MIGRATED
Definition: crm.h:174
#define pe_flag_stop_action_orphans
Definition: pe_types.h:104
bool pe__is_guest_or_remote_node(const pe_node_t *node)
Definition: remote.c:41
GHashTable * attrs
Definition: pe_types.h:234
gboolean(* create_probe)(pe_resource_t *, pe_node_t *, pe_action_t *, gboolean, pe_working_set_t *)
enum pe_action_flags pcmk__bundle_action_flags(pe_action_t *action, pe_node_t *node)
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:19
#define XML_CONFIG_ATTR_SHUTDOWN_LOCK
Definition: msg_xml.h:387
#define crm_crit(fmt, args...)
Definition: logging.h:349
char data[0]
Definition: cpg.c:55
#define INFINITY
Definition: crm.h:99
#define pe__set_action_flags(action, flags_to_set)
Definition: internal.h:59
gboolean stage2(pe_working_set_t *data_set)
gboolean stage5(pe_working_set_t *data_set)
#define pe__show_node_weights(level, rsc, text, nodes, data_set)
Definition: internal.h:371
#define CRM_OP_FENCE
Definition: crm.h:145
gboolean stage3(pe_working_set_t *data_set)
#define XML_ATTR_TRANSITION_MAGIC
Definition: msg_xml.h:398
enum rsc_role_e(* state)(const pe_resource_t *, gboolean)
Definition: pe_types.h:53
GList * sort_nodes_by_weight(GList *nodes, pe_node_t *active_node, pe_working_set_t *data_set)
pe_check_parameters
Definition: pe_types.h:195
#define XML_TAG_GRAPH
Definition: msg_xml.h:325
#define stop_action(rsc, node, optional)
Definition: internal.h:395
void complex_set_cmds(pe_resource_t *rsc)
void pe__add_param_check(xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, enum pe_check_parameters, pe_working_set_t *data_set)
Definition: remote.c:220
pe_resource_t * container
Definition: pe_types.h:379
bool pcmk__strcase_any_of(const char *s,...) G_GNUC_NULL_TERMINATED
Definition: strings.c:929
pe_node_t * partial_migration_source
Definition: pe_types.h:364
#define pe_flag_concurrent_fencing
Definition: pe_types.h:101
#define XML_ATTR_TYPE
Definition: msg_xml.h:132
void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set)
Definition: utils.c:2321
void pcmk__output_set_log_level(pcmk__output_t *out, int log_level)
Definition: output_log.c:303
void pcmk__log_transition_summary(const char *filename)
#define XML_CIB_TAG_CONSTRAINTS
Definition: msg_xml.h:183
#define CRM_OP_REPROBE
Definition: crm.h:154
GList * children
Definition: pe_types.h:376
resource_alloc_functions_t * cmds
Definition: pe_types.h:332
#define pe_flag_symmetric_cluster
Definition: pe_types.h:95
void native_rsc_colocation_lh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, pcmk__colocation_t *constraint, pe_working_set_t *data_set)
bool pcmk__ordering_is_invalid(pe_action_t *action, pe_action_wrapper_t *input)
GList * find_actions(GList *input, const char *key, const pe_node_t *on_node)
Definition: utils.c:1452
xmlNode * get_object_root(const char *object_type, xmlNode *the_root)
Definition: cib_utils.c:146
#define pe_flag_no_compat
Definition: pe_types.h:131
gboolean stage0(pe_working_set_t *data_set)
xmlNode * xml
Definition: pe_types.h:322
pe_resource_t * rsc
Definition: pe_types.h:409
enum rsc_role_e next_role
Definition: pe_types.h:370
void add_maintenance_update(pe_working_set_t *data_set)
const char * crm_xml_add_int(xmlNode *node, const char *name, int value)
Create an XML attribute with specified name and integer value.
Definition: nvpair.c:432
gboolean exclusive_discover
Definition: pe_types.h:351
int char2score(const char *score)
Definition: utils.c:61
pe_resource_t * remote_rsc
Definition: pe_types.h:230
pe_action_t * sched_shutdown_op(pe_node_t *node, pe_working_set_t *data_set)
resource_alloc_functions_t resource_class_alloc_functions[]
GHashTable * meta
Definition: pe_types.h:372
#define pe_rsc_unique
Definition: pe_types.h:254
resource_object_functions_t * fns
Definition: pe_types.h:331
#define XML_LRM_TAG_RESOURCE
Definition: msg_xml.h:264
void pe_action_set_flag_reason(const char *function, long line, pe_action_t *action, pe_action_t *reason, const char *text, enum pe_action_flags flags, bool overwrite)
Definition: utils.c:2214
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition: nvpair.c:324
remote_connection_state
void ReloadRsc(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set)
bool pe__bundle_needs_remote_name(pe_resource_t *rsc, pe_working_set_t *data_set)
Definition: bundle.c:949
int(* message)(pcmk__output_t *out, const char *message_id,...)
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
Definition: utils.c:1632
gboolean unpack_constraints(xmlNode *xml_constraints, pe_working_set_t *data_set)
#define pe_flag_have_status
Definition: pe_types.h:116
void group_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
enum action_tasks text2task(const char *task)
Definition: common.c:354
time_t get_effective_time(pe_working_set_t *data_set)
Definition: utils.c:1749
GList * actions
Definition: pe_types.h:164
const char * pe_pref(GHashTable *options, const char *name)
Definition: common.c:308
pe_action_t * rh_action
Definition: internal.h:187
void resource_location(pe_resource_t *rsc, pe_node_t *node, int score, const char *tag, pe_working_set_t *data_set)
Definition: utils.c:1588
xmlNode * params_restart
Definition: internal.h:499
enum pe_graph_flags native_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set)
void native_expand(pe_resource_t *rsc, pe_working_set_t *data_set)
enum crm_ais_msg_types type
Definition: cpg.c:48
#define XML_CIB_TAG_LRM
Definition: msg_xml.h:262
void native_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
pe_resource_t * rsc_lh
Definition: internal.h:170
pe_node_t * partial_migration_target
Definition: pe_types.h:363
#define RSC_START
Definition: crm.h:201
int migration_threshold
Definition: pe_types.h:343
pe_node_t * allocated_to
Definition: pe_types.h:362
pe_action_t * action
Definition: pe_types.h:533
GHashTable * pcmk__native_merge_weights(pe_resource_t *rsc, const char *rhs, GHashTable *nodes, const char *attr, float factor, uint32_t flags)
gboolean remote_was_fenced
Definition: pe_types.h:225
bool pcmk__ends_with(const char *s, const char *match)
Definition: strings.c:535
gboolean can_run_resources(const pe_node_t *node)
#define pe_flag_have_quorum
Definition: pe_types.h:94
void pcmk__bundle_rsc_colocation_lh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, pcmk__colocation_t *constraint, pe_working_set_t *data_set)
#define CRM_SCORE_INFINITY
Definition: crm.h:85
gboolean remote_requires_reset
Definition: pe_types.h:224
char * reason
Definition: pe_types.h:416
const char * action
Definition: pcmk_fence.c:30
#define pe__set_resource_flags(resource, flags_to_set)
Definition: internal.h:47
GList * resources
Definition: pe_types.h:158
void trigger_unfencing(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t *data_set)
Definition: utils.c:2143
pe_node_t * lock_node
Definition: pe_types.h:383
GList * nodes
Definition: pe_types.h:157
#define CRM_TRACE_INIT_DATA(name)
Definition: logging.h:143
#define CRMD_ACTION_START
Definition: crm.h:176
gboolean is_dc
Definition: pe_types.h:221
bool pe__is_remote_node(const pe_node_t *node)
Definition: remote.c:25
#define XML_LRM_ATTR_TASK
Definition: msg_xml.h:297
void clone_rsc_colocation_lh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, pcmk__colocation_t *constraint, pe_working_set_t *data_set)
pe__location_t * rsc2node_new(const char *id, pe_resource_t *rsc, int weight, const char *discovery_mode, pe_node_t *node, pe_working_set_t *data_set)
void native_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
#define CRMD_ACTION_STOP
Definition: crm.h:179
void group_rsc_colocation_rh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, pcmk__colocation_t *constraint, pe_working_set_t *data_set)
#define CRM_OP_CLEAR_FAILCOUNT
Definition: crm.h:155
#define pe_warn(fmt...)
Definition: internal.h:27
int weight
Definition: pe_types.h:241
gboolean crm_config_error
Definition: utils.c:52
int pcmk__scan_ll(const char *text, long long *result, long long default_value)
Definition: strings.c:97
#define pe_flag_have_remote_nodes
Definition: pe_types.h:117
#define crm_warn(fmt, args...)
Definition: logging.h:351
void(* end_list)(pcmk__output_t *out)
guint remote_reconnect_ms
Definition: pe_types.h:344
void native_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
void(* create_actions)(pe_resource_t *, pe_working_set_t *)
void clone_expand(pe_resource_t *rsc, pe_working_set_t *data_set)
pe_action_flags
Definition: pe_types.h:291
void native_rsc_colocation_rh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, pcmk__colocation_t *constraint, pe_working_set_t *data_set)
int pcmk__guint_from_hash(GHashTable *table, const char *key, guint default_val, guint *result)
Definition: strings.c:311
int rc
Definition: pcmk_fence.c:35
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition: nvpair.c:623
#define pe_rsc_failed
Definition: pe_types.h:267
#define crm_debug(fmt, args...)
Definition: logging.h:355
gboolean stage8(pe_working_set_t *data_set)
pe_resource_t * uber_parent(pe_resource_t *rsc)
Definition: complex.c:903
#define pe_flag_sanitized
Definition: pe_types.h:120
#define pe__clear_order_flags(order_flags, flags_to_clear)
Definition: internal.h:118
pe_node_t * pcmk__clone_allocate(pe_resource_t *rsc, pe_node_t *preferred, pe_working_set_t *data_set)
#define XML_CIB_ATTR_SHUTDOWN
Definition: msg_xml.h:283
void(* begin_list)(pcmk__output_t *out, const char *singular_noun, const char *plural_noun, const char *format,...) G_GNUC_PRINTF(4
pcmk__output_t * pcmk__new_logger(void)
#define XML_ATTR_ID
Definition: msg_xml.h:129
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:530
void clone_rsc_colocation_rh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, pcmk__colocation_t *constraint, pe_working_set_t *data_set)
#define XML_BOOLEAN_TRUE
Definition: msg_xml.h:140
#define XML_CIB_TAG_STATE
Definition: msg_xml.h:198
bool pe__is_guest_node(const pe_node_t *node)
Definition: remote.c:33
#define stop_key(rsc)
Definition: internal.h:394
enum pe_graph_flags group_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set)
#define pe_rsc_start_pending
Definition: pe_types.h:269
char * task
Definition: pe_types.h:413
gboolean stage4(pe_working_set_t *data_set)
GList * actions_after
Definition: pe_types.h:447
gboolean update_action(pe_action_t *action, pe_working_set_t *data_set)
#define pe__clear_action_flags(action, flags_to_clear)
Definition: internal.h:68
int custom_action_order(pe_resource_t *lh_rsc, char *lh_task, pe_action_t *lh_action, pe_resource_t *rh_rsc, char *rh_task, pe_action_t *rh_action, enum pe_ordering type, pe_working_set_t *data_set)
#define crm_trace(fmt, args...)
Definition: logging.h:356
enum rsc_digest_cmp_val rc
Definition: internal.h:496
void pe_fence_node(pe_working_set_t *data_set, pe_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:97
char * digest_secure_calc
Definition: internal.h:501
void calculate_active_ops(GList *sorted_op_list, int *start_index, int *stop_index)
Definition: unpack.c:2253
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
int(* info)(pcmk__output_t *out, const char *format,...) G_GNUC_PRINTF(2
GHashTable * meta
Definition: pe_types.h:423
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:114
struct pe_node_shared_s * details
Definition: pe_types.h:244
pe_node_t * node
Definition: pe_types.h:410
bool pe__shutdown_requested(pe_node_t *node)
Definition: utils.c:2306
#define XML_AGENT_ATTR_PROVIDER
Definition: msg_xml.h:267
bool pcmk__is_daemon
Definition: logging.c:47
gboolean order_actions(pe_action_t *lh_action, pe_action_t *rh_action, enum pe_ordering order)
Definition: utils.c:1802
unsigned long long flags
Definition: pe_types.h:347
const char * uname
Definition: pe_types.h:209
GHashTable * pcmk__group_merge_weights(pe_resource_t *rsc, const char *rhs, GHashTable *nodes, const char *attr, float factor, uint32_t flags)
Wrappers for and extensions to libxml2.
#define XML_ATTR_TE_NOWAIT
Definition: msg_xml.h:401
GHashTable * config_hash
Definition: pe_types.h:151
char * clone_name
Definition: pe_types.h:321
xmlNode * find_rsc_op_entry(pe_resource_t *rsc, const char *key)
Definition: utils.c:1321
void(* expand)(pe_resource_t *, pe_working_set_t *)
pe_resource_t * lh_rsc
Definition: internal.h:181
void LogActions(pe_resource_t *rsc, pe_working_set_t *data_set)
xmlNode * create_xml_node(xmlNode *parent, const char *name)
Definition: xml.c:696
time_t lock_time
Definition: pe_types.h:384
time_t recheck_by
Definition: pe_types.h:187
#define pe_flag_stonith_enabled
Definition: pe_types.h:98
#define PCMK__NELEM(a)
Definition: internal.h:38
const char * pe_node_attribute_raw(pe_node_t *node, const char *name)
Definition: common.c:635
GList * actions
Definition: pe_types.h:358
gboolean native_create_probe(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set)
enum pe_ordering type
Definition: internal.h:178
char * uuid
Definition: pe_types.h:414
#define XML_LRM_ATTR_RESTART_DIGEST
Definition: msg_xml.h:313
gboolean update_action_flags(pe_action_t *action, enum pe_action_flags flags, const char *source, int line)
void group_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
pe_node_t * pcmk__bundle_allocate(pe_resource_t *rsc, pe_node_t *preferred, pe_working_set_t *data_set)
enum pe_obj_types variant
Definition: pe_types.h:329
gboolean xml_has_children(const xmlNode *root)
Definition: xml.c:2027
xmlNode * input
Definition: pe_types.h:137
void pcmk__bundle_rsc_colocation_rh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, pcmk__colocation_t *constraint, pe_working_set_t *data_set)
const char * placement_strategy
Definition: pe_types.h:144
void LogNodeActions(pe_working_set_t *data_set)
int rsc_discover_mode
Definition: pe_types.h:245
xmlNode * params_all
Definition: internal.h:497
#define CRM_OP_SHUTDOWN
Definition: crm.h:144
void(* rsc_location)(pe_resource_t *, pe__location_t *)
void pe__free_param_checks(pe_working_set_t *data_set)
Definition: remote.c:261
const char * id
Definition: pe_types.h:208
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition: operations.c:45
gboolean pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set)
guint shutdown_lock
Definition: pe_types.h:189
void pcmk__output_free(pcmk__output_t *out)
Definition: output.c:19
pe_node_t * pe_find_node_id(GList *node_list, const char *id)
Definition: status.c:418
bool pe_can_fence(pe_working_set_t *data_set, pe_node_t *node)
Definition: utils.c:90
int pcmk__score_green
Definition: utils.c:57
pe_resource_t * find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set)
#define PCMK_RESOURCE_CLASS_STONITH
Definition: services.h:49
gboolean rsc_discovery_enabled
Definition: pe_types.h:223
#define XML_LRM_ATTR_SECURE_DIGEST
Definition: msg_xml.h:314
void group_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
enum pe_action_flags group_action_flags(pe_action_t *action, pe_node_t *node)
void update_colo_start_chain(pe_action_t *action, pe_working_set_t *data_set)
gboolean stage7(pe_working_set_t *data_set)
Cluster status and scheduling.
gboolean is_remote_node
Definition: pe_types.h:350
GList * ordering_constraints
Definition: pe_types.h:160
void add_hash_param(GHashTable *hash, const char *name, const char *value)
Definition: common.c:579
#define pe_flag_show_utilization
Definition: pe_types.h:134
void pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
int pcmk__score_red
Definition: utils.c:56
void clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
#define XML_LRM_TAG_RESOURCES
Definition: msg_xml.h:263
int pe__add_scores(int score1, int score2)
Definition: common.c:516
#define crm_err(fmt, args...)
Definition: logging.h:350
#define CRM_ASSERT(expr)
Definition: results.h:42
void(* finish)(pcmk__output_t *out, crm_exit_t exit_status, bool print, void **copy_dest)
#define RSC_STATUS
Definition: crm.h:215
pe_action_t * pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_working_set_t *data_set)
Schedule a controller operation to clear a fail count.
Definition: failcounts.c:364
Cluster Configuration.
op_digest_cache_t * rsc_action_digest_cmp(pe_resource_t *rsc, xmlNode *xml_op, pe_node_t *node, pe_working_set_t *data_set)
Definition: pe_digest.c:392
#define RSC_PROMOTE
Definition: crm.h:207
gboolean cluster_status(pe_working_set_t *data_set)
Definition: status.c:71
This structure contains everything that makes up a single output formatter.
int pcmk__score_yellow
Definition: utils.c:58
#define XML_LRM_ATTR_INTERVAL_MS
Definition: msg_xml.h:295
gboolean stage6(pe_working_set_t *data_set)
#define crm_log_xml_info(xml, text)
Definition: logging.h:362
void clone_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
#define XML_LRM_ATTR_CALLID
Definition: msg_xml.h:309
#define CRMD_ACTION_MIGRATE
Definition: crm.h:173
void(* internal_constraints)(pe_resource_t *, pe_working_set_t *)
gboolean shutdown
Definition: pe_types.h:219
#define crm_str(x)
Definition: logging.h:376
void pcmk__bundle_expand(pe_resource_t *rsc, pe_working_set_t *data_set)
gboolean clone_create_probe(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set)
void native_append_meta(pe_resource_t *rsc, xmlNode *xml)
GList * running_on
Definition: pe_types.h:365
enum pe_action_flags flags
Definition: pe_types.h:418
gboolean maintenance
Definition: pe_types.h:222
#define CRM_OP_PROBED
Definition: crm.h:153
#define pe_rsc_maintenance
Definition: pe_types.h:276
#define pe_rsc_failure_ignored
Definition: pe_types.h:275
GList * placement_constraints
Definition: pe_types.h:159
pe_working_set_t * cluster
Definition: pe_types.h:326
pe_resource_t * rh_rsc
Definition: internal.h:186
#define XML_CIB_TAG_STATUS
Definition: msg_xml.h:179
bool pe__resource_is_remote_conn(const pe_resource_t *rsc, const pe_working_set_t *data_set)
Definition: remote.c:17
#define crm_log_xml_trace(xml, text)
Definition: logging.h:364
gboolean crm_is_true(const char *s)
Definition: strings.c:415
bool pcmk__starts_with(const char *str, const char *prefix)
Check whether a string starts with a certain sequence.
Definition: strings.c:483
#define XML_LRM_TAG_RSC_OP
Definition: msg_xml.h:265
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:20
#define pe__set_order_flags(order_flags, flags_to_set)
Definition: internal.h:111
#define start_key(rsc)
Definition: internal.h:400
#define ID(x)
Definition: msg_xml.h:456
unsigned long long flags
Definition: pe_types.h:146
#define pe_err(fmt...)
Definition: internal.h:22
gboolean was_processing_error
Definition: common.c:20
int stickiness
Definition: pe_types.h:340
#define XML_RSC_ATTR_INTERLEAVE
Definition: msg_xml.h:224
#define PCMK__OP_FMT
Definition: internal.h:168
GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: utils.c:1540
pe_action_t * pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set)
Definition: utils.c:2381
gboolean was_processing_warning
Definition: common.c:21
void clone_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
gboolean probe_resources(pe_working_set_t *data_set)
enum pe_ordering type
Definition: pe_types.h:531
gboolean unclean
Definition: pe_types.h:217
#define pe__clear_raw_action_flags(action_flags, action_name, flags_to_clear)
Definition: internal.h:84
#define pe_flag_start_failure_fatal
Definition: pe_types.h:107
enum node_type type
Definition: pe_types.h:210
gboolean DeleteRsc(pe_resource_t *rsc, pe_node_t *node, gboolean optional, pe_working_set_t *data_set)
void group_expand(pe_resource_t *rsc, pe_working_set_t *data_set)
void pcmk__bundle_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
void graph_element_from_action(pe_action_t *action, pe_working_set_t *data_set)
#define crm_info(fmt, args...)
Definition: logging.h:353
enum pe_action_flags native_action_flags(pe_action_t *action, pe_node_t *node)
#define pe_rsc_managed
Definition: pe_types.h:249
#define pe_rsc_orphan
Definition: pe_types.h:248
enum pe_graph_flags pcmk__multi_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set)
pe_action_t * find_first_action(GList *input, const char *uuid, const char *task, pe_node_t *on_node)
Definition: utils.c:1422
void pcmk__bundle_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
void set_alloc_actions(pe_working_set_t *data_set)
void pcmk__bundle_append_meta(pe_resource_t *rsc, xmlNode *xml)
pe_ordering
Definition: pe_types.h:482
gboolean online
Definition: pe_types.h:213
uint64_t flags
Definition: remote.c:149
GList * actions_before
Definition: pe_types.h:446
int pe_get_failcount(pe_node_t *node, pe_resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set)
Definition: failcounts.c:251
action_tasks
Definition: common.h:62
pe_resource_t * parent
Definition: pe_types.h:327
pe_action_t * pe_fence_op(pe_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t *data_set)
Definition: utils.c:2035
void group_rsc_colocation_lh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, pcmk__colocation_t *constraint, pe_working_set_t *data_set)
#define pe_flag_shutdown_lock
Definition: pe_types.h:113
#define RSC_DEMOTE
Definition: crm.h:209
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:18
pe_node_t * pcmk__native_allocate(pe_resource_t *rsc, pe_node_t *preferred, pe_working_set_t *data_set)
#define XML_AGENT_ATTR_CLASS
Definition: msg_xml.h:266
xmlNode * graph
Definition: pe_types.h:176
char * id
Definition: pe_types.h:320
GHashTable * allowed_nodes
Definition: pe_types.h:367
pe_action_t * custom_action(pe_resource_t *rsc, char *key, const char *task, pe_node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set)
Definition: utils.c:415
#define RSC_MIGRATED
Definition: crm.h:199
#define pe_flag_startup_probes
Definition: pe_types.h:115
pe_node_t * pcmk__group_allocate(pe_resource_t *rsc, pe_node_t *preferred, pe_working_set_t *data_set)
#define pe_flag_stop_rsc_orphans
Definition: pe_types.h:103
#define pe__set_action_flags_as(function, line, action, flags_to_set)
Definition: internal.h:93