pacemaker  2.0.4-2deceaa
Scalable High-Availability cluster resource manager
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
pcmk_sched_allocate.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2020 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <sys/param.h>
13 
14 #include <crm/crm.h>
15 #include <crm/cib.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
18 
19 #include <glib.h>
20 
21 #include <crm/pengine/status.h>
22 #include <pacemaker-internal.h>
23 
24 CRM_TRACE_INIT_DATA(pacemaker);
25 
26 void set_alloc_actions(pe_working_set_t * data_set);
27 extern void ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set);
28 extern gboolean DeleteRsc(pe_resource_t * rsc, pe_node_t * node, gboolean optional, pe_working_set_t * data_set);
29 static void apply_remote_node_ordering(pe_working_set_t *data_set);
30 static enum remote_connection_state get_remote_node_state(pe_node_t *node);
31 
38 };
39 
40 static const char *
41 state2text(enum remote_connection_state state)
42 {
43  switch (state) {
45  return "unknown";
46  case remote_state_alive:
47  return "alive";
49  return "resting";
51  return "failed";
53  return "stopped";
54  }
55 
56  return "impossible";
57 }
58 
60  {
73  },
74  {
87  },
88  {
101  },
102  {
115  }
116 };
117 
118 gboolean
119 update_action_flags(pe_action_t * action, enum pe_action_flags flags, const char *source, int line)
120 {
121  static unsigned long calls = 0;
122  gboolean changed = FALSE;
123  gboolean clear = is_set(flags, pe_action_clear);
124  enum pe_action_flags last = action->flags;
125 
126  if (clear) {
127  action->flags = crm_clear_bit(source, line, action->uuid, action->flags, flags);
128  } else {
129  action->flags = crm_set_bit(source, line, action->uuid, action->flags, flags);
130  }
131 
132  if (last != action->flags) {
133  calls++;
134  changed = TRUE;
135  /* Useful for tracking down _who_ changed a specific flag */
136  /* CRM_ASSERT(calls != 534); */
137  clear_bit(flags, pe_action_clear);
138  crm_trace("%s on %s: %sset flags 0x%.6x (was 0x%.6x, now 0x%.6x, %lu, %s)",
139  action->uuid, action->node ? action->node->details->uname : "[none]",
140  clear ? "un-" : "", flags, last, action->flags, calls, source);
141  }
142 
143  return changed;
144 }
145 
146 static gboolean
147 check_rsc_parameters(pe_resource_t * rsc, pe_node_t * node, xmlNode * rsc_entry,
148  gboolean active_here, pe_working_set_t * data_set)
149 {
150  int attr_lpc = 0;
151  gboolean force_restart = FALSE;
152  gboolean delete_resource = FALSE;
153  gboolean changed = FALSE;
154 
155  const char *value = NULL;
156  const char *old_value = NULL;
157 
158  const char *attr_list[] = {
162  };
163 
164  for (; attr_lpc < DIMOF(attr_list); attr_lpc++) {
165  value = crm_element_value(rsc->xml, attr_list[attr_lpc]);
166  old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]);
167  if (value == old_value /* i.e. NULL */
168  || crm_str_eq(value, old_value, TRUE)) {
169  continue;
170  }
171 
172  changed = TRUE;
173  trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set);
174  if (active_here) {
175  force_restart = TRUE;
176  crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s",
177  rsc->id, node->details->uname, attr_list[attr_lpc],
178  crm_str(old_value), crm_str(value));
179  }
180  }
181  if (force_restart) {
182  /* make sure the restart happens */
183  stop_action(rsc, node, FALSE);
185  delete_resource = TRUE;
186 
187  } else if (changed) {
188  delete_resource = TRUE;
189  }
190  return delete_resource;
191 }
192 
193 static void
194 CancelXmlOp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * active_node,
195  const char *reason, pe_working_set_t * data_set)
196 {
197  guint interval_ms = 0;
198  pe_action_t *cancel = NULL;
199 
200  const char *task = NULL;
201  const char *call_id = NULL;
202 
203  CRM_CHECK(xml_op != NULL, return);
204  CRM_CHECK(active_node != NULL, return);
205 
206  task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
207  call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
208  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
209 
210  crm_info("Action " PCMK__OP_FMT " on %s will be stopped: %s",
211  rsc->id, task, interval_ms,
212  active_node->details->uname, (reason? reason : "unknown"));
213 
214  cancel = pe_cancel_op(rsc, task, interval_ms, active_node, data_set);
215  add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
216  custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set);
217 }
218 
219 static gboolean
220 check_action_definition(pe_resource_t * rsc, pe_node_t * active_node, xmlNode * xml_op,
221  pe_working_set_t * data_set)
222 {
223  char *key = NULL;
224  guint interval_ms = 0;
225  const op_digest_cache_t *digest_data = NULL;
226  gboolean did_change = FALSE;
227 
228  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
229  const char *digest_secure = NULL;
230 
231  CRM_CHECK(active_node != NULL, return FALSE);
232 
233  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
234  if (interval_ms > 0) {
235  xmlNode *op_match = NULL;
236 
237  /* we need to reconstruct the key because of the way we used to construct resource IDs */
238  key = pcmk__op_key(rsc->id, task, interval_ms);
239 
240  pe_rsc_trace(rsc, "Checking parameters for %s", key);
241  op_match = find_rsc_op_entry(rsc, key);
242 
243  if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) {
244  CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set);
245  free(key);
246  return TRUE;
247 
248  } else if (op_match == NULL) {
249  pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname);
250  free(key);
251  return TRUE;
252  }
253  free(key);
254  key = NULL;
255  }
256 
257  crm_trace("Testing " PCMK__OP_FMT " on %s",
258  rsc->id, task, interval_ms, active_node->details->uname);
259  if ((interval_ms == 0) && safe_str_eq(task, RSC_STATUS)) {
260  /* Reload based on the start action not a probe */
261  task = RSC_START;
262 
263  } else if ((interval_ms == 0) && safe_str_eq(task, RSC_MIGRATED)) {
264  /* Reload based on the start action not a migrate */
265  task = RSC_START;
266  } else if ((interval_ms == 0) && safe_str_eq(task, RSC_PROMOTE)) {
267  /* Reload based on the start action not a promote */
268  task = RSC_START;
269  }
270 
271  digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set);
272 
273  if(is_set(data_set->flags, pe_flag_sanitized)) {
274  digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
275  }
276 
277  if(digest_data->rc != RSC_DIGEST_MATCH
278  && digest_secure
279  && digest_data->digest_secure_calc
280  && strcmp(digest_data->digest_secure_calc, digest_secure) == 0) {
281  if (is_set(data_set->flags, pe_flag_stdout)) {
282  printf("Only 'private' parameters to " PCMK__OP_FMT
283  " on %s changed: %s\n",
284  rsc->id, task, interval_ms, active_node->details->uname,
286  }
287 
288  } else if (digest_data->rc == RSC_DIGEST_RESTART) {
289  /* Changes that force a restart */
290  pe_action_t *required = NULL;
291 
292  did_change = TRUE;
293  key = pcmk__op_key(rsc->id, task, interval_ms);
294  crm_log_xml_info(digest_data->params_restart, "params:restart");
295  required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
296  pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL,
297  "resource definition change", pe_action_optional, TRUE);
298 
299  trigger_unfencing(rsc, active_node, "Device parameters changed", NULL, data_set);
300 
301  } else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) {
302  /* Changes that can potentially be handled by a reload */
303  const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
304 
305  did_change = TRUE;
306  trigger_unfencing(rsc, active_node, "Device parameters changed (reload)", NULL, data_set);
307  crm_log_xml_info(digest_data->params_all, "params:reload");
308  key = pcmk__op_key(rsc->id, task, interval_ms);
309 
310  if (interval_ms > 0) {
311  pe_action_t *op = NULL;
312 
313 #if 0
314  /* Always reload/restart the entire resource */
315  ReloadRsc(rsc, active_node, data_set);
316 #else
317  /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */
318  op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set);
320 #endif
321 
322  } else if (digest_restart) {
323  pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id);
324 
325  /* Reload this resource */
326  ReloadRsc(rsc, active_node, data_set);
327  free(key);
328 
329  } else {
330  pe_action_t *required = NULL;
331  pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id);
332 
333  /* Re-send the start/demote/promote op
334  * Recurring ops will be detected independently
335  */
336  required = custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set);
337  pe_action_set_flag_reason(__FUNCTION__, __LINE__, required, NULL,
338  "resource definition change", pe_action_optional, TRUE);
339  }
340  }
341 
342  return did_change;
343 }
344 
351 static void
352 check_params(pe_resource_t *rsc, pe_node_t *node, xmlNode *rsc_op,
353  enum pe_check_parameters check, pe_working_set_t *data_set)
354 {
355  const char *reason = NULL;
356  op_digest_cache_t *digest_data = NULL;
357 
358  switch (check) {
359  case pe_check_active:
360  if (check_action_definition(rsc, node, rsc_op, data_set)
361  && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
362  data_set)) {
363 
364  reason = "action definition changed";
365  }
366  break;
367 
369  digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, data_set);
370  switch (digest_data->rc) {
371  case RSC_DIGEST_UNKNOWN:
372  crm_trace("Resource %s history entry %s on %s has no digest to compare",
373  rsc->id, ID(rsc_op), node->details->id);
374  break;
375  case RSC_DIGEST_MATCH:
376  break;
377  default:
378  reason = "resource parameters have changed";
379  break;
380  }
381  break;
382  }
383 
384  if (reason) {
385  pe__clear_failcount(rsc, node, reason, data_set);
386  }
387 }
388 
389 static void
390 check_actions_for(xmlNode * rsc_entry, pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set)
391 {
392  GListPtr gIter = NULL;
393  int offset = -1;
394  int stop_index = 0;
395  int start_index = 0;
396 
397  const char *task = NULL;
398 
399  xmlNode *rsc_op = NULL;
400  GListPtr op_list = NULL;
401  GListPtr sorted_op_list = NULL;
402 
403  CRM_CHECK(node != NULL, return);
404 
405  if (is_set(rsc->flags, pe_rsc_orphan)) {
406  pe_resource_t *parent = uber_parent(rsc);
407  if(parent == NULL
408  || pe_rsc_is_clone(parent) == FALSE
409  || is_set(parent->flags, pe_rsc_unique)) {
410  pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id);
411  DeleteRsc(rsc, node, FALSE, data_set);
412  } else {
413  pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id);
414  }
415  return;
416 
417  } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
418  if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) {
419  DeleteRsc(rsc, node, FALSE, data_set);
420  }
421  pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s",
422  rsc->id, node->details->uname);
423  return;
424  }
425 
426  pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname);
427 
428  if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) {
429  DeleteRsc(rsc, node, FALSE, data_set);
430  }
431 
432  for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL;
433  rsc_op = __xml_next_element(rsc_op)) {
434 
435  if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
436  op_list = g_list_prepend(op_list, rsc_op);
437  }
438  }
439 
440  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
441  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
442 
443  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
444  xmlNode *rsc_op = (xmlNode *) gIter->data;
445  guint interval_ms = 0;
446 
447  offset++;
448 
449  if (start_index < stop_index) {
450  /* stopped */
451  continue;
452  } else if (offset < start_index) {
453  /* action occurred prior to a start */
454  continue;
455  }
456 
457  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
458  crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
459 
460  if ((interval_ms > 0) &&
461  (is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) {
462  // Maintenance mode cancels recurring operations
463  CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set);
464 
465  } else if ((interval_ms > 0)
466  || safe_str_eq(task, RSC_STATUS)
467  || safe_str_eq(task, RSC_START)
468  || safe_str_eq(task, RSC_PROMOTE)
469  || safe_str_eq(task, RSC_MIGRATED)) {
470 
471  /* If a resource operation failed, and the operation's definition
472  * has changed, clear any fail count so they can be retried fresh.
473  */
474 
475  if (pe__bundle_needs_remote_name(rsc)) {
476  /* We haven't allocated resources to nodes yet, so if the
477  * REMOTE_CONTAINER_HACK is used, we may calculate the digest
478  * based on the literal "#uname" value rather than the properly
479  * substituted value. That would mistakenly make the action
480  * definition appear to have been changed. Defer the check until
481  * later in this case.
482  */
483  pe__add_param_check(rsc_op, rsc, node, pe_check_active,
484  data_set);
485 
486  } else if (check_action_definition(rsc, node, rsc_op, data_set)
487  && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
488  data_set)) {
489  pe__clear_failcount(rsc, node, "action definition changed",
490  data_set);
491  }
492  }
493  }
494  g_list_free(sorted_op_list);
495 }
496 
497 static GListPtr
498 find_rsc_list(GListPtr result, pe_resource_t * rsc, const char *id, gboolean renamed_clones,
499  gboolean partial, pe_working_set_t * data_set)
500 {
501  GListPtr gIter = NULL;
502  gboolean match = FALSE;
503 
504  if (id == NULL) {
505  return NULL;
506  }
507 
508  if (rsc == NULL) {
509  if (data_set == NULL) {
510  return NULL;
511  }
512  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
513  pe_resource_t *child = (pe_resource_t *) gIter->data;
514 
515  result = find_rsc_list(result, child, id, renamed_clones, partial,
516  NULL);
517  }
518  return result;
519  }
520 
521  if (partial) {
522  if (strstr(rsc->id, id)) {
523  match = TRUE;
524 
525  } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) {
526  match = TRUE;
527  }
528 
529  } else {
530  if (strcmp(rsc->id, id) == 0) {
531  match = TRUE;
532 
533  } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) {
534  match = TRUE;
535  }
536  }
537 
538  if (match) {
539  result = g_list_prepend(result, rsc);
540  }
541 
542  if (rsc->children) {
543  gIter = rsc->children;
544  for (; gIter != NULL; gIter = gIter->next) {
545  pe_resource_t *child = (pe_resource_t *) gIter->data;
546 
547  result = find_rsc_list(result, child, id, renamed_clones, partial, NULL);
548  }
549  }
550 
551  return result;
552 }
553 
554 static void
555 check_actions(pe_working_set_t * data_set)
556 {
557  const char *id = NULL;
558  pe_node_t *node = NULL;
559  xmlNode *lrm_rscs = NULL;
560  xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
561 
562  xmlNode *node_state = NULL;
563 
564  for (node_state = __xml_first_child_element(status); node_state != NULL;
565  node_state = __xml_next_element(node_state)) {
566  if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
567  id = crm_element_value(node_state, XML_ATTR_ID);
568  lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
569  lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE);
570 
571  node = pe_find_node_id(data_set->nodes, id);
572 
573  if (node == NULL) {
574  continue;
575 
576  /* Still need to check actions for a maintenance node to cancel existing monitor operations */
577  } else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) {
578  crm_trace("Skipping param check for %s: can't run resources",
579  node->details->uname);
580  continue;
581  }
582 
583  crm_trace("Processing node %s", node->details->uname);
584  if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
585  xmlNode *rsc_entry = NULL;
586 
587  for (rsc_entry = __xml_first_child_element(lrm_rscs);
588  rsc_entry != NULL;
589  rsc_entry = __xml_next_element(rsc_entry)) {
590 
591  if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
592 
593  if (xml_has_children(rsc_entry)) {
594  GListPtr gIter = NULL;
595  GListPtr result = NULL;
596  const char *rsc_id = ID(rsc_entry);
597 
598  CRM_CHECK(rsc_id != NULL, return);
599 
600  result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set);
601  for (gIter = result; gIter != NULL; gIter = gIter->next) {
602  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
603 
604  if (rsc->variant != pe_native) {
605  continue;
606  }
607  check_actions_for(rsc_entry, rsc, node, data_set);
608  }
609  g_list_free(result);
610  }
611  }
612  }
613  }
614  }
615  }
616 }
617 
618 static void
619 apply_placement_constraints(pe_working_set_t * data_set)
620 {
621  for (GList *gIter = data_set->placement_constraints;
622  gIter != NULL; gIter = gIter->next) {
623  pe__location_t *cons = gIter->data;
624 
625  cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons);
626  }
627 }
628 
629 static gboolean
630 failcount_clear_action_exists(pe_node_t * node, pe_resource_t * rsc)
631 {
632  gboolean rc = FALSE;
633  GList *list = pe__resource_actions(rsc, node, CRM_OP_CLEAR_FAILCOUNT, TRUE);
634 
635  if (list) {
636  rc = TRUE;
637  }
638  g_list_free(list);
639  return rc;
640 }
641 
650 static void
651 check_migration_threshold(pe_resource_t *rsc, pe_node_t *node,
652  pe_working_set_t *data_set)
653 {
654  int fail_count, countdown;
655  pe_resource_t *failed;
656 
657  /* Migration threshold of 0 means never force away */
658  if (rsc->migration_threshold == 0) {
659  return;
660  }
661 
662  // If we're ignoring failures, also ignore the migration threshold
663  if (is_set(rsc->flags, pe_rsc_failure_ignored)) {
664  return;
665  }
666 
667  /* If there are no failures, there's no need to force away */
668  fail_count = pe_get_failcount(node, rsc, NULL,
670  data_set);
671  if (fail_count <= 0) {
672  return;
673  }
674 
675  /* How many more times recovery will be tried on this node */
676  countdown = QB_MAX(rsc->migration_threshold - fail_count, 0);
677 
678  /* If failed resource has a parent, we'll force the parent away */
679  failed = rsc;
680  if (is_not_set(rsc->flags, pe_rsc_unique)) {
681  failed = uber_parent(rsc);
682  }
683 
684  if (countdown == 0) {
685  resource_location(failed, node, -INFINITY, "__fail_limit__", data_set);
686  crm_warn("Forcing %s away from %s after %d failures (max=%d)",
687  failed->id, node->details->uname, fail_count,
688  rsc->migration_threshold);
689  } else {
690  crm_info("%s can fail %d more times on %s before being forced off",
691  failed->id, countdown, node->details->uname);
692  }
693 }
694 
695 static void
696 common_apply_stickiness(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set)
697 {
698  if (rsc->children) {
699  GListPtr gIter = rsc->children;
700 
701  for (; gIter != NULL; gIter = gIter->next) {
702  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
703 
704  common_apply_stickiness(child_rsc, node, data_set);
705  }
706  return;
707  }
708 
709  if (is_set(rsc->flags, pe_rsc_managed)
710  && rsc->stickiness != 0 && pcmk__list_of_1(rsc->running_on)) {
711  pe_node_t *current = pe_find_node_id(rsc->running_on, node->details->id);
712  pe_node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
713 
714  if (current == NULL) {
715 
716  } else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
717  pe_resource_t *sticky_rsc = rsc;
718 
719  resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set);
720  pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location"
721  " (node=%s, weight=%d)", sticky_rsc->id,
722  node->details->uname, rsc->stickiness);
723  } else {
724  GHashTableIter iter;
725  pe_node_t *nIter = NULL;
726 
727  pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric"
728  " and node %s is not explicitly allowed", rsc->id, node->details->uname);
729  g_hash_table_iter_init(&iter, rsc->allowed_nodes);
730  while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) {
731  crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight);
732  }
733  }
734  }
735 
736  /* Check the migration threshold only if a failcount clear action
737  * has not already been placed for this resource on the node.
738  * There is no sense in potentially forcing the resource from this
739  * node if the failcount is being reset anyway.
740  *
741  * @TODO A clear_failcount operation can be scheduled in stage4() via
742  * check_actions_for(), or in stage5() via check_params(). This runs in
743  * stage2(), so it cannot detect those, meaning we might check the migration
744  * threshold when we shouldn't -- worst case, we stop or move the resource,
745  * then move it back next transition.
746  */
747  if (failcount_clear_action_exists(node, rsc) == FALSE) {
748  check_migration_threshold(rsc, node, data_set);
749  }
750 }
751 
752 void
754 {
755  GListPtr gIter = rsc->children;
756 
757  rsc->cmds = &resource_class_alloc_functions[rsc->variant];
758 
759  for (; gIter != NULL; gIter = gIter->next) {
760  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
761 
762  complex_set_cmds(child_rsc);
763  }
764 }
765 
766 void
768 {
769 
770  GListPtr gIter = data_set->resources;
771 
772  for (; gIter != NULL; gIter = gIter->next) {
773  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
774 
775  complex_set_cmds(rsc);
776  }
777 }
778 
779 static void
780 calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data)
781 {
782  const char *key = (const char *)gKey;
783  const char *value = (const char *)gValue;
784  int *system_health = (int *)user_data;
785 
786  if (!gKey || !gValue || !user_data) {
787  return;
788  }
789 
790  if (pcmk__starts_with(key, "#health")) {
791  int score;
792 
793  /* Convert the value into an integer */
794  score = char2score(value);
795 
796  /* Add it to the running total */
797  *system_health = pe__add_scores(score, *system_health);
798  }
799 }
800 
801 static gboolean
802 apply_system_health(pe_working_set_t * data_set)
803 {
804  GListPtr gIter = NULL;
805  const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy");
806  int base_health = 0;
807 
808  if (health_strategy == NULL || safe_str_eq(health_strategy, "none")) {
809  /* Prevent any accidental health -> score translation */
810  pcmk__score_red = 0;
811  pcmk__score_yellow = 0;
812  pcmk__score_green = 0;
813  return TRUE;
814 
815  } else if (safe_str_eq(health_strategy, "migrate-on-red")) {
816 
817  /* Resources on nodes which have health values of red are
818  * weighted away from that node.
819  */
821  pcmk__score_yellow = 0;
822  pcmk__score_green = 0;
823 
824  } else if (safe_str_eq(health_strategy, "only-green")) {
825 
826  /* Resources on nodes which have health values of red or yellow
827  * are forced away from that node.
828  */
831  pcmk__score_green = 0;
832 
833  } else if (safe_str_eq(health_strategy, "progressive")) {
834  /* Same as the above, but use the r/y/g scores provided by the user
835  * Defaults are provided by the pe_prefs table
836  * Also, custom health "base score" can be used
837  */
838  base_health = crm_parse_int(pe_pref(data_set->config_hash, "node-health-base"), "0");
839 
840  } else if (safe_str_eq(health_strategy, "custom")) {
841 
842  /* Requires the admin to configure the rsc_location constaints for
843  * processing the stored health scores
844  */
845  /* TODO: Check for the existence of appropriate node health constraints */
846  return TRUE;
847 
848  } else {
849  crm_err("Unknown node health strategy: %s", health_strategy);
850  return FALSE;
851  }
852 
853  crm_info("Applying automated node health strategy: %s", health_strategy);
854 
855  for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
856  int system_health = base_health;
857  pe_node_t *node = (pe_node_t *) gIter->data;
858 
859  /* Search through the node hash table for system health entries. */
860  g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health);
861 
862  crm_info(" Node %s has an combined system health of %d",
863  node->details->uname, system_health);
864 
865  /* If the health is non-zero, then create a new rsc2node so that the
866  * weight will be added later on.
867  */
868  if (system_health != 0) {
869 
870  GListPtr gIter2 = data_set->resources;
871 
872  for (; gIter2 != NULL; gIter2 = gIter2->next) {
873  pe_resource_t *rsc = (pe_resource_t *) gIter2->data;
874 
875  rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set);
876  }
877  }
878  }
879 
880  return TRUE;
881 }
882 
883 gboolean
885 {
886  xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input);
887 
888  if (data_set->input == NULL) {
889  return FALSE;
890  }
891 
892  if (is_set(data_set->flags, pe_flag_have_status) == FALSE) {
893  crm_trace("Calculating status");
894  cluster_status(data_set);
895  }
896 
897  set_alloc_actions(data_set);
898  apply_system_health(data_set);
899  unpack_constraints(cib_constraints, data_set);
900 
901  return TRUE;
902 }
903 
904 /*
905  * Check nodes for resources started outside of the LRM
906  */
907 gboolean
909 {
910  pe_action_t *probe_node_complete = NULL;
911 
912  for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
913  pe_node_t *node = (pe_node_t *) gIter->data;
914  const char *probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
915 
916  if (node->details->online == FALSE) {
917 
918  if (pe__is_remote_node(node) && node->details->remote_rsc
919  && (get_remote_node_state(node) == remote_state_failed)) {
920 
921  pe_fence_node(data_set, node, "the connection is unrecoverable", FALSE);
922  }
923  continue;
924 
925  } else if (node->details->unclean) {
926  continue;
927 
928  } else if (node->details->rsc_discovery_enabled == FALSE) {
929  /* resource discovery is disabled for this node */
930  continue;
931  }
932 
933  if (probed != NULL && crm_is_true(probed) == FALSE) {
934  pe_action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname),
935  CRM_OP_REPROBE, node, FALSE, TRUE, data_set);
936 
938  continue;
939  }
940 
941  for (GListPtr gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) {
942  pe_resource_t *rsc = (pe_resource_t *) gIter2->data;
943 
944  rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set);
945  }
946  }
947  return TRUE;
948 }
949 
950 static void
951 rsc_discover_filter(pe_resource_t *rsc, pe_node_t *node)
952 {
953  GListPtr gIter = rsc->children;
954  pe_resource_t *top = uber_parent(rsc);
955  pe_node_t *match;
956 
957  if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) {
958  return;
959  }
960 
961  for (; gIter != NULL; gIter = gIter->next) {
962  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
963  rsc_discover_filter(child_rsc, node);
964  }
965 
966  match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
967  if (match && match->rsc_discover_mode != pe_discover_exclusive) {
968  match->weight = -INFINITY;
969  }
970 }
971 
972 static time_t
973 shutdown_time(pe_node_t *node, pe_working_set_t *data_set)
974 {
975  const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
976  time_t result = 0;
977 
978  if (shutdown) {
979  errno = 0;
980  result = (time_t) crm_parse_ll(shutdown, NULL);
981  if (errno != 0) {
982  result = 0;
983  }
984  }
985  return result? result : get_effective_time(data_set);
986 }
987 
988 static void
989 apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set)
990 {
991  const char *class;
992 
993  // Only primitives and (uncloned) groups may be locked
994  if (rsc->variant == pe_group) {
995  for (GList *item = rsc->children; item != NULL;
996  item = item->next) {
997  apply_shutdown_lock((pe_resource_t *) item->data, data_set);
998  }
999  } else if (rsc->variant != pe_native) {
1000  return;
1001  }
1002 
1003  // Fence devices and remote connections can't be locked
1004  class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
1005  if ((class == NULL) || !strcmp(class, PCMK_RESOURCE_CLASS_STONITH)
1006  || pe__resource_is_remote_conn(rsc, data_set)) {
1007  return;
1008  }
1009 
1010  if (rsc->lock_node != NULL) {
1011  // The lock was obtained from resource history
1012 
1013  if (rsc->running_on != NULL) {
1014  /* The resource was started elsewhere even though it is now
1015  * considered locked. This shouldn't be possible, but as a
1016  * failsafe, we don't want to disturb the resource now.
1017  */
1018  pe_rsc_info(rsc,
1019  "Cancelling shutdown lock because %s is already active",
1020  rsc->id);
1021  pe__clear_resource_history(rsc, rsc->lock_node, data_set);
1022  rsc->lock_node = NULL;
1023  rsc->lock_time = 0;
1024  }
1025 
1026  // Only a resource active on exactly one node can be locked
1027  } else if (pcmk__list_of_1(rsc->running_on)) {
1028  pe_node_t *node = rsc->running_on->data;
1029 
1030  if (node->details->shutdown) {
1031  if (node->details->unclean) {
1032  pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
1033  rsc->id, node->details->uname);
1034  } else {
1035  rsc->lock_node = node;
1036  rsc->lock_time = shutdown_time(node, data_set);
1037  }
1038  }
1039  }
1040 
1041  if (rsc->lock_node == NULL) {
1042  // No lock needed
1043  return;
1044  }
1045 
1046  if (data_set->shutdown_lock > 0) {
1047  time_t lock_expiration = rsc->lock_time + data_set->shutdown_lock;
1048 
1049  pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
1050  rsc->id, rsc->lock_node->details->uname,
1051  (long long) lock_expiration);
1052  pe__update_recheck_time(++lock_expiration, data_set);
1053  } else {
1054  pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
1055  rsc->id, rsc->lock_node->details->uname);
1056  }
1057 
1058  // If resource is locked to one node, ban it from all other nodes
1059  for (GList *item = data_set->nodes; item != NULL; item = item->next) {
1060  pe_node_t *node = item->data;
1061 
1062  if (strcmp(node->details->uname, rsc->lock_node->details->uname)) {
1064  XML_CONFIG_ATTR_SHUTDOWN_LOCK, data_set);
1065  }
1066  }
1067 }
1068 
1069 /*
1070  * \internal
1071  * \brief Stage 2 of cluster status: apply node-specific criteria
1072  *
1073  * Count known nodes, and apply location constraints, stickiness, and exclusive
1074  * resource discovery.
1075  */
1076 gboolean
1078 {
1079  GListPtr gIter = NULL;
1080 
1081  if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
1082  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1083  apply_shutdown_lock((pe_resource_t *) gIter->data, data_set);
1084  }
1085  }
1086 
1087  if (is_not_set(data_set->flags, pe_flag_no_compat)) {
1088  // @COMPAT API backward compatibility
1089  for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1090  pe_node_t *node = (pe_node_t *) gIter->data;
1091 
1092  if (node && (node->weight >= 0) && node->details->online
1093  && (node->details->type != node_ping)) {
1094  data_set->max_valid_nodes++;
1095  }
1096  }
1097  }
1098 
1099  crm_trace("Applying placement constraints");
1100  apply_placement_constraints(data_set);
1101 
1102  gIter = data_set->nodes;
1103  for (; gIter != NULL; gIter = gIter->next) {
1104  GListPtr gIter2 = NULL;
1105  pe_node_t *node = (pe_node_t *) gIter->data;
1106 
1107  gIter2 = data_set->resources;
1108  for (; gIter2 != NULL; gIter2 = gIter2->next) {
1109  pe_resource_t *rsc = (pe_resource_t *) gIter2->data;
1110 
1111  common_apply_stickiness(rsc, node, data_set);
1112  rsc_discover_filter(rsc, node);
1113  }
1114  }
1115 
1116  return TRUE;
1117 }
1118 
1119 /*
1120  * Create internal resource constraints before allocation
1121  */
1122 gboolean
1124 {
1125 
1126  GListPtr gIter = data_set->resources;
1127 
1128  for (; gIter != NULL; gIter = gIter->next) {
1129  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1130 
1131  rsc->cmds->internal_constraints(rsc, data_set);
1132  }
1133 
1134  return TRUE;
1135 }
1136 
1137 /*
1138  * Check for orphaned or redefined actions
1139  */
1140 gboolean
1142 {
1143  check_actions(data_set);
1144  return TRUE;
1145 }
1146 
1147 static void *
1148 convert_const_pointer(const void *ptr)
1149 {
1150  /* Worst function ever */
1151  return (void *)ptr;
1152 }
1153 
1154 static gint
1155 sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data)
1156 {
1157  int rc = 0;
1158  int r1_weight = -INFINITY;
1159  int r2_weight = -INFINITY;
1160 
1161  const char *reason = "existence";
1162 
1163  const GListPtr nodes = (GListPtr) data;
1164  const pe_resource_t *resource1 = a;
1165  const pe_resource_t *resource2 = b;
1166 
1167  pe_node_t *r1_node = NULL;
1168  pe_node_t *r2_node = NULL;
1169  GListPtr gIter = NULL;
1170  GHashTable *r1_nodes = NULL;
1171  GHashTable *r2_nodes = NULL;
1172 
1173  reason = "priority";
1174  r1_weight = resource1->priority;
1175  r2_weight = resource2->priority;
1176 
1177  if (r1_weight > r2_weight) {
1178  rc = -1;
1179  goto done;
1180  }
1181 
1182  if (r1_weight < r2_weight) {
1183  rc = 1;
1184  goto done;
1185  }
1186 
1187  reason = "no node list";
1188  if (nodes == NULL) {
1189  goto done;
1190  }
1191 
1192  r1_nodes = pcmk__native_merge_weights(convert_const_pointer(resource1),
1193  resource1->id, NULL, NULL, 1,
1195  pe__show_node_weights(true, NULL, resource1->id, r1_nodes);
1196 
1197  r2_nodes = pcmk__native_merge_weights(convert_const_pointer(resource2),
1198  resource2->id, NULL, NULL, 1,
1200  pe__show_node_weights(true, NULL, resource2->id, r2_nodes);
1201 
1202  /* Current location score */
1203  reason = "current location";
1204  r1_weight = -INFINITY;
1205  r2_weight = -INFINITY;
1206 
1207  if (resource1->running_on) {
1208  r1_node = pe__current_node(resource1);
1209  r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id);
1210  if (r1_node != NULL) {
1211  r1_weight = r1_node->weight;
1212  }
1213  }
1214  if (resource2->running_on) {
1215  r2_node = pe__current_node(resource2);
1216  r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id);
1217  if (r2_node != NULL) {
1218  r2_weight = r2_node->weight;
1219  }
1220  }
1221 
1222  if (r1_weight > r2_weight) {
1223  rc = -1;
1224  goto done;
1225  }
1226 
1227  if (r1_weight < r2_weight) {
1228  rc = 1;
1229  goto done;
1230  }
1231 
1232  reason = "score";
1233  for (gIter = nodes; gIter != NULL; gIter = gIter->next) {
1234  pe_node_t *node = (pe_node_t *) gIter->data;
1235 
1236  r1_node = NULL;
1237  r2_node = NULL;
1238 
1239  r1_weight = -INFINITY;
1240  if (r1_nodes) {
1241  r1_node = g_hash_table_lookup(r1_nodes, node->details->id);
1242  }
1243  if (r1_node) {
1244  r1_weight = r1_node->weight;
1245  }
1246 
1247  r2_weight = -INFINITY;
1248  if (r2_nodes) {
1249  r2_node = g_hash_table_lookup(r2_nodes, node->details->id);
1250  }
1251  if (r2_node) {
1252  r2_weight = r2_node->weight;
1253  }
1254 
1255  if (r1_weight > r2_weight) {
1256  rc = -1;
1257  goto done;
1258  }
1259 
1260  if (r1_weight < r2_weight) {
1261  rc = 1;
1262  goto done;
1263  }
1264  }
1265 
1266  done:
1267  crm_trace("%s (%d) on %s %c %s (%d) on %s: %s",
1268  resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a",
1269  rc < 0 ? '>' : rc > 0 ? '<' : '=',
1270  resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason);
1271 
1272  if (r1_nodes) {
1273  g_hash_table_destroy(r1_nodes);
1274  }
1275  if (r2_nodes) {
1276  g_hash_table_destroy(r2_nodes);
1277  }
1278 
1279  return rc;
1280 }
1281 
1282 static void
1283 allocate_resources(pe_working_set_t * data_set)
1284 {
1285  GListPtr gIter = NULL;
1286 
1287  if (is_set(data_set->flags, pe_flag_have_remote_nodes)) {
1288  /* Allocate remote connection resources first (which will also allocate
1289  * any colocation dependencies). If the connection is migrating, always
1290  * prefer the partial migration target.
1291  */
1292  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1293  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1294  if (rsc->is_remote_node == FALSE) {
1295  continue;
1296  }
1297  pe_rsc_trace(rsc, "Allocating remote connection resource '%s'",
1298  rsc->id);
1299  rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set);
1300  }
1301  }
1302 
1303  /* now do the rest of the resources */
1304  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1305  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1306  if (rsc->is_remote_node == TRUE) {
1307  continue;
1308  }
1309  pe_rsc_trace(rsc, "Allocating %s resource '%s'",
1310  crm_element_name(rsc->xml), rsc->id);
1311  rsc->cmds->allocate(rsc, NULL, data_set);
1312  }
1313 }
1314 
1315 /* We always use pe_order_preserve with these convenience functions to exempt
1316  * internally generated constraints from the prohibition of user constraints
1317  * involving remote connection resources.
1318  *
1319  * The start ordering additionally uses pe_order_runnable_left so that the
1320  * specified action is not runnable if the start is not runnable.
1321  */
1322 
1323 static inline void
1324 order_start_then_action(pe_resource_t *lh_rsc, pe_action_t *rh_action,
1325  enum pe_ordering extra, pe_working_set_t *data_set)
1326 {
1327  if (lh_rsc && rh_action && data_set) {
1328  custom_action_order(lh_rsc, start_key(lh_rsc), NULL,
1329  rh_action->rsc, NULL, rh_action,
1330  pe_order_preserve | pe_order_runnable_left | extra,
1331  data_set);
1332  }
1333 }
1334 
1335 static inline void
1336 order_action_then_stop(pe_action_t *lh_action, pe_resource_t *rh_rsc,
1337  enum pe_ordering extra, pe_working_set_t *data_set)
1338 {
1339  if (lh_action && rh_rsc && data_set) {
1340  custom_action_order(lh_action->rsc, NULL, lh_action,
1341  rh_rsc, stop_key(rh_rsc), NULL,
1342  pe_order_preserve | extra, data_set);
1343  }
1344 }
1345 
1346 // Clear fail counts for orphaned rsc on all online nodes
1347 static void
1348 cleanup_orphans(pe_resource_t * rsc, pe_working_set_t * data_set)
1349 {
1350  GListPtr gIter = NULL;
1351 
1352  for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1353  pe_node_t *node = (pe_node_t *) gIter->data;
1354 
1355  if (node->details->online
1356  && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL,
1357  data_set)) {
1358 
1359  pe_action_t *clear_op = NULL;
1360 
1361  clear_op = pe__clear_failcount(rsc, node, "it is orphaned",
1362  data_set);
1363 
1364  /* We can't use order_action_then_stop() here because its
1365  * pe_order_preserve breaks things
1366  */
1367  custom_action_order(clear_op->rsc, NULL, clear_op,
1368  rsc, stop_key(rsc), NULL,
1369  pe_order_optional, data_set);
1370  }
1371  }
1372 }
1373 
1374 gboolean
1376 {
1377  GListPtr gIter = NULL;
1378  int log_prio = show_utilization? LOG_STDOUT : LOG_TRACE;
1379 
1380  if (safe_str_neq(data_set->placement_strategy, "default")) {
1381  GListPtr nodes = g_list_copy(data_set->nodes);
1382 
1383  nodes = sort_nodes_by_weight(nodes, NULL, data_set);
1384  data_set->resources =
1385  g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes);
1386 
1387  g_list_free(nodes);
1388  }
1389 
1390  gIter = data_set->nodes;
1391  for (; gIter != NULL; gIter = gIter->next) {
1392  pe_node_t *node = (pe_node_t *) gIter->data;
1393 
1394  dump_node_capacity(log_prio, "Original", node);
1395  }
1396 
1397  crm_trace("Allocating services");
1398  /* Take (next) highest resource, assign it and create its actions */
1399 
1400  allocate_resources(data_set);
1401 
1402  gIter = data_set->nodes;
1403  for (; gIter != NULL; gIter = gIter->next) {
1404  pe_node_t *node = (pe_node_t *) gIter->data;
1405 
1406  dump_node_capacity(log_prio, "Remaining", node);
1407  }
1408 
1409  // Process deferred action checks
1410  pe__foreach_param_check(data_set, check_params);
1411  pe__free_param_checks(data_set);
1412 
1413  if (is_set(data_set->flags, pe_flag_startup_probes)) {
1414  crm_trace("Calculating needed probes");
1415  /* This code probably needs optimization
1416  * ptest -x with 100 nodes, 100 clones and clone-max=100:
1417 
1418  With probes:
1419 
1420  ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
1421  ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
1422  ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
1423  ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions
1424  ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
1425  ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services
1426  ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes
1427  ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions
1428  ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done
1429  ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
1430  ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
1431  36s
1432  ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
1433 
1434  Without probes:
1435 
1436  ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
1437  ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
1438  ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
1439  ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions
1440  ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
1441  ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services
1442  ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions
1443  ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done
1444  ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
1445  ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
1446  ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
1447  */
1448 
1449  probe_resources(data_set);
1450  }
1451 
1452  crm_trace("Handle orphans");
1453  if (is_set(data_set->flags, pe_flag_stop_rsc_orphans)) {
1454  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1455  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1456 
1457  /* There's no need to recurse into rsc->children because those
1458  * should just be unallocated clone instances.
1459  */
1460  if (is_set(rsc->flags, pe_rsc_orphan)) {
1461  cleanup_orphans(rsc, data_set);
1462  }
1463  }
1464  }
1465 
1466  crm_trace("Creating actions");
1467 
1468  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
1469  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1470 
1471  rsc->cmds->create_actions(rsc, data_set);
1472  }
1473 
1474  crm_trace("Creating done");
1475  return TRUE;
1476 }
1477 
1478 static gboolean
1479 is_managed(const pe_resource_t * rsc)
1480 {
1481  GListPtr gIter = rsc->children;
1482 
1483  if (is_set(rsc->flags, pe_rsc_managed)) {
1484  return TRUE;
1485  }
1486 
1487  for (; gIter != NULL; gIter = gIter->next) {
1488  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
1489 
1490  if (is_managed(child_rsc)) {
1491  return TRUE;
1492  }
1493  }
1494 
1495  return FALSE;
1496 }
1497 
1498 static gboolean
1499 any_managed_resources(pe_working_set_t * data_set)
1500 {
1501 
1502  GListPtr gIter = data_set->resources;
1503 
1504  for (; gIter != NULL; gIter = gIter->next) {
1505  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
1506 
1507  if (is_managed(rsc)) {
1508  return TRUE;
1509  }
1510  }
1511  return FALSE;
1512 }
1513 
1521 static void
1522 fence_guest(pe_node_t *node, pe_working_set_t *data_set)
1523 {
1524  pe_resource_t *container = node->details->remote_rsc->container;
1525  pe_action_t *stop = NULL;
1526  pe_action_t *stonith_op = NULL;
1527 
1528  /* The fence action is just a label; we don't do anything differently for
1529  * off vs. reboot. We specify it explicitly, rather than let it default to
1530  * cluster's default action, because we are not _initiating_ fencing -- we
1531  * are creating a pseudo-event to describe fencing that is already occurring
1532  * by other means (container recovery).
1533  */
1534  const char *fence_action = "off";
1535 
1536  /* Check whether guest's container resource has any explicit stop or
1537  * start (the stop may be implied by fencing of the guest's host).
1538  */
1539  if (container) {
1540  stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL);
1541 
1542  if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) {
1543  fence_action = "reboot";
1544  }
1545  }
1546 
1547  /* Create a fence pseudo-event, so we have an event to order actions
1548  * against, and the controller can always detect it.
1549  */
1550  stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", FALSE, data_set);
1552  __FUNCTION__, __LINE__);
1553 
1554  /* We want to imply stops/demotes after the guest is stopped, not wait until
1555  * it is restarted, so we always order pseudo-fencing after stop, not start
1556  * (even though start might be closer to what is done for a real reboot).
1557  */
1558  if(stop && is_set(stop->flags, pe_action_pseudo)) {
1559  pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, FALSE, data_set);
1560  crm_info("Implying guest node %s is down (action %d) after %s fencing",
1561  node->details->uname, stonith_op->id, stop->node->details->uname);
1562  order_actions(parent_stonith_op, stonith_op,
1564 
1565  } else if (stop) {
1566  order_actions(stop, stonith_op,
1568  crm_info("Implying guest node %s is down (action %d) "
1569  "after container %s is stopped (action %d)",
1570  node->details->uname, stonith_op->id,
1571  container->id, stop->id);
1572  } else {
1573  /* If we're fencing the guest node but there's no stop for the guest
1574  * resource, we must think the guest is already stopped. However, we may
1575  * think so because its resource history was just cleaned. To avoid
1576  * unnecessarily considering the guest node down if it's really up,
1577  * order the pseudo-fencing after any stop of the connection resource,
1578  * which will be ordered after any container (re-)probe.
1579  */
1580  stop = find_first_action(node->details->remote_rsc->actions, NULL,
1581  RSC_STOP, NULL);
1582 
1583  if (stop) {
1584  order_actions(stop, stonith_op, pe_order_optional);
1585  crm_info("Implying guest node %s is down (action %d) "
1586  "after connection is stopped (action %d)",
1587  node->details->uname, stonith_op->id, stop->id);
1588  } else {
1589  /* Not sure why we're fencing, but everything must already be
1590  * cleanly stopped.
1591  */
1592  crm_info("Implying guest node %s is down (action %d) ",
1593  node->details->uname, stonith_op->id);
1594  }
1595  }
1596 
1597  /* Order/imply other actions relative to pseudo-fence as with real fence */
1598  pcmk__order_vs_fence(stonith_op, data_set);
1599 }
1600 
1601 /*
1602  * Create dependencies for stonith and shutdown operations
1603  */
1604 gboolean
1606 {
1607  pe_action_t *dc_down = NULL;
1608  pe_action_t *stonith_op = NULL;
1609  gboolean integrity_lost = FALSE;
1610  gboolean need_stonith = TRUE;
1611  GListPtr gIter;
1612  GListPtr stonith_ops = NULL;
1613  GList *shutdown_ops = NULL;
1614 
1615  /* Remote ordering constraints need to happen prior to calculating fencing
1616  * because it is one more place we will mark the node as dirty.
1617  *
1618  * A nice side effect of doing them early is that apply_*_ordering() can be
1619  * simpler because pe_fence_node() has already done some of the work.
1620  */
1621  crm_trace("Creating remote ordering constraints");
1622  apply_remote_node_ordering(data_set);
1623 
1624  crm_trace("Processing fencing and shutdown cases");
1625  if (any_managed_resources(data_set) == FALSE) {
1626  crm_notice("Delaying fencing operations until there are resources to manage");
1627  need_stonith = FALSE;
1628  }
1629 
1630  /* Check each node for stonith/shutdown */
1631  for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1632  pe_node_t *node = (pe_node_t *) gIter->data;
1633 
1634  /* Guest nodes are "fenced" by recovering their container resource,
1635  * so handle them separately.
1636  */
1637  if (pe__is_guest_node(node)) {
1638  if (node->details->remote_requires_reset && need_stonith
1639  && pe_can_fence(data_set, node)) {
1640  fence_guest(node, data_set);
1641  }
1642  continue;
1643  }
1644 
1645  stonith_op = NULL;
1646 
1647  if (node->details->unclean
1648  && need_stonith && pe_can_fence(data_set, node)) {
1649 
1650  stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, data_set);
1651  pe_warn("Scheduling Node %s for STONITH", node->details->uname);
1652 
1653  pcmk__order_vs_fence(stonith_op, data_set);
1654 
1655  if (node->details->is_dc) {
1656  // Remember if the DC is being fenced
1657  dc_down = stonith_op;
1658 
1659  } else {
1660 
1661  if (is_not_set(data_set->flags, pe_flag_concurrent_fencing)
1662  && (stonith_ops != NULL)) {
1663  /* Concurrent fencing is disabled, so order each non-DC
1664  * fencing in a chain. If there is any DC fencing or
1665  * shutdown, it will be ordered after the last action in the
1666  * chain later.
1667  */
1668  order_actions((pe_action_t *) stonith_ops->data,
1669  stonith_op, pe_order_optional);
1670  }
1671 
1672  // Remember all non-DC fencing actions in a separate list
1673  stonith_ops = g_list_prepend(stonith_ops, stonith_op);
1674  }
1675 
1676  } else if (node->details->online && node->details->shutdown &&
1677  /* TODO define what a shutdown op means for a remote node.
1678  * For now we do not send shutdown operations for remote nodes, but
1679  * if we can come up with a good use for this in the future, we will. */
1680  pe__is_guest_or_remote_node(node) == FALSE) {
1681 
1682  pe_action_t *down_op = sched_shutdown_op(node, data_set);
1683 
1684  if (node->details->is_dc) {
1685  // Remember if the DC is being shut down
1686  dc_down = down_op;
1687  } else {
1688  // Remember non-DC shutdowns for later ordering
1689  shutdown_ops = g_list_prepend(shutdown_ops, down_op);
1690  }
1691  }
1692 
1693  if (node->details->unclean && stonith_op == NULL) {
1694  integrity_lost = TRUE;
1695  pe_warn("Node %s is unclean!", node->details->uname);
1696  }
1697  }
1698 
1699  if (integrity_lost) {
1700  if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
1701  pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED");
1702  pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE");
1703 
1704  } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) {
1705  crm_notice("Cannot fence unclean nodes until quorum is"
1706  " attained (or no-quorum-policy is set to ignore)");
1707  }
1708  }
1709 
1710  if (dc_down != NULL) {
1711  /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated
1712  * DC elections. However, we don't want to order non-DC shutdowns before
1713  * a DC *fencing*, because even though we don't want a node that's
1714  * shutting down to become DC, the DC fencing could be ordered before a
1715  * clone stop that's also ordered before the shutdowns, thus leading to
1716  * a graph loop.
1717  */
1718  if (safe_str_eq(dc_down->task, CRM_OP_SHUTDOWN)) {
1719  for (gIter = shutdown_ops; gIter != NULL; gIter = gIter->next) {
1720  pe_action_t *node_stop = (pe_action_t *) gIter->data;
1721 
1722  crm_debug("Ordering shutdown on %s before %s on DC %s",
1723  node_stop->node->details->uname,
1724  dc_down->task, dc_down->node->details->uname);
1725 
1726  order_actions(node_stop, dc_down, pe_order_optional);
1727  }
1728  }
1729 
1730  // Order any non-DC fencing before any DC fencing or shutdown
1731 
1732  if (is_set(data_set->flags, pe_flag_concurrent_fencing)) {
1733  /* With concurrent fencing, order each non-DC fencing action
1734  * separately before any DC fencing or shutdown.
1735  */
1736  for (gIter = stonith_ops; gIter != NULL; gIter = gIter->next) {
1737  order_actions((pe_action_t *) gIter->data, dc_down,
1739  }
1740  } else if (stonith_ops) {
1741  /* Without concurrent fencing, the non-DC fencing actions are
1742  * already ordered relative to each other, so we just need to order
1743  * the DC fencing after the last action in the chain (which is the
1744  * first item in the list).
1745  */
1746  order_actions((pe_action_t *) stonith_ops->data, dc_down,
1748  }
1749  }
1750  g_list_free(stonith_ops);
1751  g_list_free(shutdown_ops);
1752  return TRUE;
1753 }
1754 
1755 /*
1756  * Determine the sets of independent actions and the correct order for the
1757  * actions in each set.
1758  *
1759  * Mark dependencies of un-runnable actions un-runnable
1760  *
1761  */
1762 static GListPtr
1763 find_actions_by_task(GListPtr actions, pe_resource_t * rsc, const char *original_key)
1764 {
1765  GListPtr list = NULL;
1766 
1767  list = find_actions(actions, original_key, NULL);
1768  if (list == NULL) {
1769  /* we're potentially searching a child of the original resource */
1770  char *key = NULL;
1771  char *task = NULL;
1772  guint interval_ms = 0;
1773 
1774  if (parse_op_key(original_key, NULL, &task, &interval_ms)) {
1775  key = pcmk__op_key(rsc->id, task, interval_ms);
1776  list = find_actions(actions, key, NULL);
1777 
1778  } else {
1779  crm_err("search key: %s", original_key);
1780  }
1781 
1782  free(key);
1783  free(task);
1784  }
1785 
1786  return list;
1787 }
1788 
1789 static void
1790 rsc_order_then(pe_action_t *lh_action, pe_resource_t *rsc,
1791  pe__ordering_t *order)
1792 {
1793  GListPtr gIter = NULL;
1794  GListPtr rh_actions = NULL;
1795  pe_action_t *rh_action = NULL;
1796  enum pe_ordering type;
1797 
1798  CRM_CHECK(rsc != NULL, return);
1799  CRM_CHECK(order != NULL, return);
1800 
1801  type = order->type;
1802  rh_action = order->rh_action;
1803  crm_trace("Processing RH of ordering constraint %d", order->id);
1804 
1805  if (rh_action != NULL) {
1806  rh_actions = g_list_prepend(NULL, rh_action);
1807 
1808  } else if (rsc != NULL) {
1809  rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task);
1810  }
1811 
1812  if (rh_actions == NULL) {
1813  pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..."
1814  " ignoring", rsc->id, order->rh_action_task);
1815  if (lh_action) {
1816  pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid);
1817  }
1818  return;
1819  }
1820 
1821  if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) {
1822  pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid,
1823  order->rh_action_task);
1825  }
1826 
1827  gIter = rh_actions;
1828  for (; gIter != NULL; gIter = gIter->next) {
1829  pe_action_t *rh_action_iter = (pe_action_t *) gIter->data;
1830 
1831  if (lh_action) {
1832  order_actions(lh_action, rh_action_iter, type);
1833 
1834  } else if (type & pe_order_implies_then) {
1835  update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
1836  crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type);
1837  } else {
1838  crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type);
1839  }
1840  }
1841 
1842  g_list_free(rh_actions);
1843 }
1844 
1845 static void
1846 rsc_order_first(pe_resource_t *lh_rsc, pe__ordering_t *order,
1847  pe_working_set_t *data_set)
1848 {
1849  GListPtr gIter = NULL;
1850  GListPtr lh_actions = NULL;
1851  pe_action_t *lh_action = order->lh_action;
1852  pe_resource_t *rh_rsc = order->rh_rsc;
1853 
1854  crm_trace("Processing LH of ordering constraint %d", order->id);
1855  CRM_ASSERT(lh_rsc != NULL);
1856 
1857  if (lh_action != NULL) {
1858  lh_actions = g_list_prepend(NULL, lh_action);
1859 
1860  } else {
1861  lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task);
1862  }
1863 
1864  if (lh_actions == NULL && lh_rsc != rh_rsc) {
1865  char *key = NULL;
1866  char *op_type = NULL;
1867  guint interval_ms = 0;
1868 
1869  parse_op_key(order->lh_action_task, NULL, &op_type, &interval_ms);
1870  key = pcmk__op_key(lh_rsc->id, op_type, interval_ms);
1871 
1872  if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) {
1873  free(key);
1874  pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
1875  lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1876 
1877  } else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && safe_str_eq(op_type, RSC_DEMOTE)) {
1878  free(key);
1879  pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
1880  lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1881 
1882  } else {
1883  pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating",
1884  lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
1885  lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set);
1886  lh_actions = g_list_prepend(NULL, lh_action);
1887  }
1888 
1889  free(op_type);
1890  }
1891 
1892  gIter = lh_actions;
1893  for (; gIter != NULL; gIter = gIter->next) {
1894  pe_action_t *lh_action_iter = (pe_action_t *) gIter->data;
1895 
1896  if (rh_rsc == NULL && order->rh_action) {
1897  rh_rsc = order->rh_action->rsc;
1898  }
1899  if (rh_rsc) {
1900  rsc_order_then(lh_action_iter, rh_rsc, order);
1901 
1902  } else if (order->rh_action) {
1903  order_actions(lh_action_iter, order->rh_action, order->type);
1904  }
1905  }
1906 
1907  g_list_free(lh_actions);
1908 }
1909 
1911  pe_working_set_t *data_set);
1912 
1913 static int
1914 is_recurring_action(pe_action_t *action)
1915 {
1916  guint interval_ms;
1917 
1918  if (pcmk__guint_from_hash(action->meta,
1920  &interval_ms) != pcmk_rc_ok) {
1921  return 0;
1922  }
1923  return (interval_ms > 0);
1924 }
1925 
1926 static void
1927 apply_container_ordering(pe_action_t *action, pe_working_set_t *data_set)
1928 {
1929  /* VMs are also classified as containers for these purposes... in
1930  * that they both involve a 'thing' running on a real or remote
1931  * cluster node.
1932  *
1933  * This allows us to be smarter about the type and extent of
1934  * recovery actions required in various scenarios
1935  */
1936  pe_resource_t *remote_rsc = NULL;
1937  pe_resource_t *container = NULL;
1938  enum action_tasks task = text2task(action->task);
1939 
1940  CRM_ASSERT(action->rsc);
1941  CRM_ASSERT(action->node);
1943 
1944  remote_rsc = action->node->details->remote_rsc;
1945  CRM_ASSERT(remote_rsc);
1946 
1947  container = remote_rsc->container;
1948  CRM_ASSERT(container);
1949 
1950  if(is_set(container->flags, pe_rsc_failed)) {
1951  pe_fence_node(data_set, action->node, "container failed", FALSE);
1952  }
1953 
1954  crm_trace("Order %s action %s relative to %s%s for %s%s",
1955  action->task, action->uuid,
1956  is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
1957  remote_rsc->id,
1958  is_set(container->flags, pe_rsc_failed)? "failed " : "",
1959  container->id);
1960 
1961  if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE)
1962  || safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) {
1963  /* Migration ops map to "no_action", but we need to apply the same
1964  * ordering as for stop or demote (see get_router_node()).
1965  */
1966  task = stop_rsc;
1967  }
1968 
1969  switch (task) {
1970  case start_rsc:
1971  case action_promote:
1972  /* Force resource recovery if the container is recovered */
1973  order_start_then_action(container, action, pe_order_implies_then,
1974  data_set);
1975 
1976  /* Wait for the connection resource to be up too */
1977  order_start_then_action(remote_rsc, action, pe_order_none,
1978  data_set);
1979  break;
1980 
1981  case stop_rsc:
1982  case action_demote:
1983  if (is_set(container->flags, pe_rsc_failed)) {
1984  /* When the container representing a guest node fails, any stop
1985  * or demote actions for resources running on the guest node
1986  * are implied by the container stopping. This is similar to
1987  * how fencing operations work for cluster nodes and remote
1988  * nodes.
1989  */
1990  } else {
1991  /* Ensure the operation happens before the connection is brought
1992  * down.
1993  *
1994  * If we really wanted to, we could order these after the
1995  * connection start, IFF the container's current role was
1996  * stopped (otherwise we re-introduce an ordering loop when the
1997  * connection is restarting).
1998  */
1999  order_action_then_stop(action, remote_rsc, pe_order_none,
2000  data_set);
2001  }
2002  break;
2003 
2004  default:
2005  /* Wait for the connection resource to be up */
2006  if (is_recurring_action(action)) {
2007  /* In case we ever get the recovery logic wrong, force
2008  * recurring monitors to be restarted, even if just
2009  * the connection was re-established
2010  */
2011  if(task != no_action) {
2012  order_start_then_action(remote_rsc, action,
2013  pe_order_implies_then, data_set);
2014  }
2015  } else {
2016  order_start_then_action(remote_rsc, action, pe_order_none,
2017  data_set);
2018  }
2019  break;
2020  }
2021 }
2022 
2023 static enum remote_connection_state
2024 get_remote_node_state(pe_node_t *node)
2025 {
2026  pe_resource_t *remote_rsc = NULL;
2027  pe_node_t *cluster_node = NULL;
2028 
2029  CRM_ASSERT(node);
2030 
2031  remote_rsc = node->details->remote_rsc;
2032  CRM_ASSERT(remote_rsc);
2033 
2034  cluster_node = pe__current_node(remote_rsc);
2035 
2036  /* If the cluster node the remote connection resource resides on
2037  * is unclean or went offline, we can't process any operations
2038  * on that remote node until after it starts elsewhere.
2039  */
2040  if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) {
2041  /* The connection resource is not going to run anywhere */
2042 
2043  if (cluster_node && cluster_node->details->unclean) {
2044  /* The remote connection is failed because its resource is on a
2045  * failed node and can't be recovered elsewhere, so we must fence.
2046  */
2047  return remote_state_failed;
2048  }
2049 
2050  if (is_not_set(remote_rsc->flags, pe_rsc_failed)) {
2051  /* Connection resource is cleanly stopped */
2052  return remote_state_stopped;
2053  }
2054 
2055  /* Connection resource is failed */
2056 
2057  if ((remote_rsc->next_role == RSC_ROLE_STOPPED)
2058  && remote_rsc->remote_reconnect_ms
2059  && node->details->remote_was_fenced
2060  && !pe__shutdown_requested(node)) {
2061 
2062  /* We won't know whether the connection is recoverable until the
2063  * reconnect interval expires and we reattempt connection.
2064  */
2065  return remote_state_unknown;
2066  }
2067 
2068  /* The remote connection is in a failed state. If there are any
2069  * resources known to be active on it (stop) or in an unknown state
2070  * (probe), we must assume the worst and fence it.
2071  */
2072  return remote_state_failed;
2073 
2074  } else if (cluster_node == NULL) {
2075  /* Connection is recoverable but not currently running anywhere, see if we can recover it first */
2076  return remote_state_unknown;
2077 
2078  } else if(cluster_node->details->unclean == TRUE
2079  || cluster_node->details->online == FALSE) {
2080  /* Connection is running on a dead node, see if we can recover it first */
2081  return remote_state_resting;
2082 
2083  } else if (pcmk__list_of_multiple(remote_rsc->running_on)
2084  && remote_rsc->partial_migration_source
2085  && remote_rsc->partial_migration_target) {
2086  /* We're in the middle of migrating a connection resource,
2087  * wait until after the resource migrates before performing
2088  * any actions.
2089  */
2090  return remote_state_resting;
2091 
2092  }
2093  return remote_state_alive;
2094 }
2095 
2100 static void
2101 apply_remote_ordering(pe_action_t *action, pe_working_set_t *data_set)
2102 {
2103  pe_resource_t *remote_rsc = NULL;
2104  enum action_tasks task = text2task(action->task);
2105  enum remote_connection_state state = get_remote_node_state(action->node);
2106 
2107  enum pe_ordering order_opts = pe_order_none;
2108 
2109  if (action->rsc == NULL) {
2110  return;
2111  }
2112 
2113  CRM_ASSERT(action->node);
2115 
2116  remote_rsc = action->node->details->remote_rsc;
2117  CRM_ASSERT(remote_rsc);
2118 
2119  crm_trace("Order %s action %s relative to %s%s (state: %s)",
2120  action->task, action->uuid,
2121  is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
2122  remote_rsc->id, state2text(state));
2123 
2124  if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE)
2125  || safe_str_eq(action->task, CRMD_ACTION_MIGRATED)) {
2126  /* Migration ops map to "no_action", but we need to apply the same
2127  * ordering as for stop or demote (see get_router_node()).
2128  */
2129  task = stop_rsc;
2130  }
2131 
2132  switch (task) {
2133  case start_rsc:
2134  case action_promote:
2135  order_opts = pe_order_none;
2136 
2137  if (state == remote_state_failed) {
2138  /* Force recovery, by making this action required */
2139  order_opts |= pe_order_implies_then;
2140  }
2141 
2142  /* Ensure connection is up before running this action */
2143  order_start_then_action(remote_rsc, action, order_opts, data_set);
2144  break;
2145 
2146  case stop_rsc:
2147  if(state == remote_state_alive) {
2148  order_action_then_stop(action, remote_rsc,
2149  pe_order_implies_first, data_set);
2150 
2151  } else if(state == remote_state_failed) {
2152  /* The resource is active on the node, but since we don't have a
2153  * valid connection, the only way to stop the resource is by
2154  * fencing the node. There is no need to order the stop relative
2155  * to the remote connection, since the stop will become implied
2156  * by the fencing.
2157  */
2158  pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable", FALSE);
2159 
2160  } else if(remote_rsc->next_role == RSC_ROLE_STOPPED) {
2161  /* State must be remote_state_unknown or remote_state_stopped.
2162  * Since the connection is not coming back up in this
2163  * transition, stop this resource first.
2164  */
2165  order_action_then_stop(action, remote_rsc,
2166  pe_order_implies_first, data_set);
2167 
2168  } else {
2169  /* The connection is going to be started somewhere else, so
2170  * stop this resource after that completes.
2171  */
2172  order_start_then_action(remote_rsc, action, pe_order_none, data_set);
2173  }
2174  break;
2175 
2176  case action_demote:
2177  /* Only order this demote relative to the connection start if the
2178  * connection isn't being torn down. Otherwise, the demote would be
2179  * blocked because the connection start would not be allowed.
2180  */
2181  if(state == remote_state_resting || state == remote_state_unknown) {
2182  order_start_then_action(remote_rsc, action, pe_order_none,
2183  data_set);
2184  } /* Otherwise we can rely on the stop ordering */
2185  break;
2186 
2187  default:
2188  /* Wait for the connection resource to be up */
2189  if (is_recurring_action(action)) {
2190  /* In case we ever get the recovery logic wrong, force
2191  * recurring monitors to be restarted, even if just
2192  * the connection was re-established
2193  */
2194  order_start_then_action(remote_rsc, action,
2195  pe_order_implies_then, data_set);
2196 
2197  } else {
2198  pe_node_t *cluster_node = pe__current_node(remote_rsc);
2199 
2200  if(task == monitor_rsc && state == remote_state_failed) {
2201  /* We would only be here if we do not know the
2202  * state of the resource on the remote node.
2203  * Since we have no way to find out, it is
2204  * necessary to fence the node.
2205  */
2206  pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable", FALSE);
2207  }
2208 
2209  if(cluster_node && state == remote_state_stopped) {
2210  /* The connection is currently up, but is going
2211  * down permanently.
2212  *
2213  * Make sure we check services are actually
2214  * stopped _before_ we let the connection get
2215  * closed
2216  */
2217  order_action_then_stop(action, remote_rsc,
2218  pe_order_runnable_left, data_set);
2219 
2220  } else {
2221  order_start_then_action(remote_rsc, action, pe_order_none,
2222  data_set);
2223  }
2224  }
2225  break;
2226  }
2227 }
2228 
2229 static void
2230 apply_remote_node_ordering(pe_working_set_t *data_set)
2231 {
2232  if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
2233  return;
2234  }
2235 
2236  for (GListPtr gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2237  pe_action_t *action = (pe_action_t *) gIter->data;
2238  pe_resource_t *remote = NULL;
2239 
2240  // We are only interested in resource actions
2241  if (action->rsc == NULL) {
2242  continue;
2243  }
2244 
2245  /* Special case: If we are clearing the failcount of an actual
2246  * remote connection resource, then make sure this happens before
2247  * any start of the resource in this transition.
2248  */
2249  if (action->rsc->is_remote_node &&
2251 
2252  custom_action_order(action->rsc,
2253  NULL,
2254  action,
2255  action->rsc,
2256  pcmk__op_key(action->rsc->id, RSC_START, 0),
2257  NULL,
2259  data_set);
2260 
2261  continue;
2262  }
2263 
2264  // We are only interested in actions allocated to a node
2265  if (action->node == NULL) {
2266  continue;
2267  }
2268 
2269  if (!pe__is_guest_or_remote_node(action->node)) {
2270  continue;
2271  }
2272 
2273  /* We are only interested in real actions.
2274  *
2275  * @TODO This is probably wrong; pseudo-actions might be converted to
2276  * real actions and vice versa later in update_actions() at the end of
2277  * stage7().
2278  */
2279  if (is_set(action->flags, pe_action_pseudo)) {
2280  continue;
2281  }
2282 
2283  remote = action->node->details->remote_rsc;
2284  if (remote == NULL) {
2285  // Orphaned
2286  continue;
2287  }
2288 
2289  /* Another special case: if a resource is moving to a Pacemaker Remote
2290  * node, order the stop on the original node after any start of the
2291  * remote connection. This ensures that if the connection fails to
2292  * start, we leave the resource running on the original node.
2293  */
2294  if (safe_str_eq(action->task, RSC_START)) {
2295  for (GList *item = action->rsc->actions; item != NULL;
2296  item = item->next) {
2297  pe_action_t *rsc_action = item->data;
2298 
2299  if ((rsc_action->node->details != action->node->details)
2300  && safe_str_eq(rsc_action->task, RSC_STOP)) {
2301  custom_action_order(remote, start_key(remote), NULL,
2302  action->rsc, NULL, rsc_action,
2303  pe_order_optional, data_set);
2304  }
2305  }
2306  }
2307 
2308  /* The action occurs across a remote connection, so create
2309  * ordering constraints that guarantee the action occurs while the node
2310  * is active (after start, before stop ... things like that).
2311  *
2312  * This is somewhat brittle in that we need to make sure the results of
2313  * this ordering are compatible with the result of get_router_node().
2314  * It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part
2315  * of this logic rather than action2xml().
2316  */
2317  if (remote->container) {
2318  crm_trace("Container ordering for %s", action->uuid);
2319  apply_container_ordering(action, data_set);
2320 
2321  } else {
2322  crm_trace("Remote ordering for %s", action->uuid);
2323  apply_remote_ordering(action, data_set);
2324  }
2325  }
2326 }
2327 
2328 static gboolean
2329 order_first_probe_unneeded(pe_action_t * probe, pe_action_t * rh_action)
2330 {
2331  /* No need to probe the resource on the node that is being
2332  * unfenced. Otherwise it might introduce transition loop
2333  * since probe will be performed after the node is
2334  * unfenced.
2335  */
2336  if (safe_str_eq(rh_action->task, CRM_OP_FENCE)
2337  && probe->node && rh_action->node
2338  && probe->node->details == rh_action->node->details) {
2339  const char *op = g_hash_table_lookup(rh_action->meta, "stonith_action");
2340 
2341  if (safe_str_eq(op, "on")) {
2342  return TRUE;
2343  }
2344  }
2345 
2346  // Shutdown waits for probe to complete only if it's on the same node
2347  if ((safe_str_eq(rh_action->task, CRM_OP_SHUTDOWN))
2348  && probe->node && rh_action->node
2349  && probe->node->details != rh_action->node->details) {
2350  return TRUE;
2351  }
2352  return FALSE;
2353 }
2354 
2355 static void
2356 order_first_probes_imply_stops(pe_working_set_t * data_set)
2357 {
2358  GListPtr gIter = NULL;
2359 
2360  for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
2361  pe__ordering_t *order = gIter->data;
2362  enum pe_ordering order_type = pe_order_optional;
2363 
2364  pe_resource_t *lh_rsc = order->lh_rsc;
2365  pe_resource_t *rh_rsc = order->rh_rsc;
2366  pe_action_t *lh_action = order->lh_action;
2367  pe_action_t *rh_action = order->rh_action;
2368  const char *lh_action_task = order->lh_action_task;
2369  const char *rh_action_task = order->rh_action_task;
2370 
2371  GListPtr probes = NULL;
2372  GListPtr rh_actions = NULL;
2373 
2374  GListPtr pIter = NULL;
2375 
2376  if (lh_rsc == NULL) {
2377  continue;
2378 
2379  } else if (rh_rsc && lh_rsc == rh_rsc) {
2380  continue;
2381  }
2382 
2383  if (lh_action == NULL && lh_action_task == NULL) {
2384  continue;
2385  }
2386 
2387  if (rh_action == NULL && rh_action_task == NULL) {
2388  continue;
2389  }
2390 
2391  /* Technically probe is expected to return "not running", which could be
2392  * the alternative of stop action if the status of the resource is
2393  * unknown yet.
2394  */
2395  if (lh_action && safe_str_neq(lh_action->task, RSC_STOP)) {
2396  continue;
2397 
2398  } else if (lh_action == NULL
2399  && lh_action_task
2400  && !pcmk__ends_with(lh_action_task, "_" RSC_STOP "_0")) {
2401  continue;
2402  }
2403 
2404  /* Do not probe the resource inside of a stopping container. Otherwise
2405  * it might introduce transition loop since probe will be performed
2406  * after the container starts again.
2407  */
2408  if (rh_rsc && lh_rsc->container == rh_rsc) {
2409  if (rh_action && safe_str_eq(rh_action->task, RSC_STOP)) {
2410  continue;
2411 
2412  } else if (rh_action == NULL && rh_action_task
2413  && pcmk__ends_with(rh_action_task,"_" RSC_STOP "_0")) {
2414  continue;
2415  }
2416  }
2417 
2418  if (order->type == pe_order_none) {
2419  continue;
2420  }
2421 
2422  // Preserve the order options for future filtering
2423  if (is_set(order->type, pe_order_apply_first_non_migratable)) {
2425  }
2426 
2427  if (is_set(order->type, pe_order_same_node)) {
2428  set_bit(order_type, pe_order_same_node);
2429  }
2430 
2431  // Keep the order types for future filtering
2432  if (order->type == pe_order_anti_colocation
2433  || order->type == pe_order_load) {
2434  order_type = order->type;
2435  }
2436 
2437  probes = pe__resource_actions(lh_rsc, NULL, RSC_STATUS, FALSE);
2438  if (probes == NULL) {
2439  continue;
2440  }
2441 
2442  if (rh_action) {
2443  rh_actions = g_list_prepend(rh_actions, rh_action);
2444 
2445  } else if (rh_rsc && rh_action_task) {
2446  rh_actions = find_actions(rh_rsc->actions, rh_action_task, NULL);
2447  }
2448 
2449  if (rh_actions == NULL) {
2450  g_list_free(probes);
2451  continue;
2452  }
2453 
2454  crm_trace("Processing for LH probe based on ordering constraint %s -> %s"
2455  " (id=%d, type=%.6x)",
2456  lh_action ? lh_action->uuid : lh_action_task,
2457  rh_action ? rh_action->uuid : rh_action_task,
2458  order->id, order->type);
2459 
2460  for (pIter = probes; pIter != NULL; pIter = pIter->next) {
2461  pe_action_t *probe = (pe_action_t *) pIter->data;
2462  GListPtr rIter = NULL;
2463 
2464  for (rIter = rh_actions; rIter != NULL; rIter = rIter->next) {
2465  pe_action_t *rh_action_iter = (pe_action_t *) rIter->data;
2466 
2467  if (order_first_probe_unneeded(probe, rh_action_iter)) {
2468  continue;
2469  }
2470  order_actions(probe, rh_action_iter, order_type);
2471  }
2472  }
2473 
2474  g_list_free(rh_actions);
2475  g_list_free(probes);
2476  }
2477 }
2478 
2479 static void
2480 order_first_probe_then_restart_repromote(pe_action_t * probe,
2481  pe_action_t * after,
2482  pe_working_set_t * data_set)
2483 {
2484  GListPtr gIter = NULL;
2485  bool interleave = FALSE;
2486  pe_resource_t *compatible_rsc = NULL;
2487 
2488  if (probe == NULL
2489  || probe->rsc == NULL
2490  || probe->rsc->variant != pe_native) {
2491  return;
2492  }
2493 
2494  if (after == NULL
2495  // Avoid running into any possible loop
2496  || is_set(after->flags, pe_action_tracking)) {
2497  return;
2498  }
2499 
2500  if (safe_str_neq(probe->task, RSC_STATUS)) {
2501  return;
2502  }
2503 
2505 
2506  crm_trace("Processing based on %s %s -> %s %s",
2507  probe->uuid,
2508  probe->node ? probe->node->details->uname: "",
2509  after->uuid,
2510  after->node ? after->node->details->uname : "");
2511 
2512  if (after->rsc
2513  /* Better not build a dependency directly with a clone/group.
2514  * We are going to proceed through the ordering chain and build
2515  * dependencies with its children.
2516  */
2517  && after->rsc->variant == pe_native
2518  && probe->rsc != after->rsc) {
2519 
2520  GListPtr then_actions = NULL;
2521  enum pe_ordering probe_order_type = pe_order_optional;
2522 
2523  if (safe_str_eq(after->task, RSC_START)) {
2524  then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP, FALSE);
2525 
2526  } else if (safe_str_eq(after->task, RSC_PROMOTE)) {
2527  then_actions = pe__resource_actions(after->rsc, NULL, RSC_DEMOTE, FALSE);
2528  }
2529 
2530  for (gIter = then_actions; gIter != NULL; gIter = gIter->next) {
2531  pe_action_t *then = (pe_action_t *) gIter->data;
2532 
2533  // Skip any pseudo action which for example is implied by fencing
2534  if (is_set(then->flags, pe_action_pseudo)) {
2535  continue;
2536  }
2537 
2538  order_actions(probe, then, probe_order_type);
2539  }
2540  g_list_free(then_actions);
2541  }
2542 
2543  if (after->rsc
2544  && after->rsc->variant > pe_group) {
2545  const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
2547 
2548  interleave = crm_is_true(interleave_s);
2549 
2550  if (interleave) {
2551  /* For an interleaved clone, we should build a dependency only
2552  * with the relevant clone child.
2553  */
2554  compatible_rsc = find_compatible_child(probe->rsc,
2555  after->rsc,
2557  FALSE, data_set);
2558  }
2559  }
2560 
2561  for (gIter = after->actions_after; gIter != NULL; gIter = gIter->next) {
2562  pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) gIter->data;
2563  /* pe_order_implies_then is the reason why a required A.start
2564  * implies/enforces B.start to be required too, which is the cause of
2565  * B.restart/re-promote.
2566  *
2567  * Not sure about pe_order_implies_then_on_node though. It's now only
2568  * used for unfencing case, which tends to introduce transition
2569  * loops...
2570  */
2571 
2572  if (is_not_set(after_wrapper->type, pe_order_implies_then)) {
2573  /* The order type between a group/clone and its child such as
2574  * B.start-> B_child.start is:
2575  * pe_order_implies_first_printed | pe_order_runnable_left
2576  *
2577  * Proceed through the ordering chain and build dependencies with
2578  * its children.
2579  */
2580  if (after->rsc == NULL
2581  || after->rsc->variant < pe_group
2582  || probe->rsc->parent == after->rsc
2583  || after_wrapper->action->rsc == NULL
2584  || after_wrapper->action->rsc->variant > pe_group
2585  || after->rsc != after_wrapper->action->rsc->parent) {
2586  continue;
2587  }
2588 
2589  /* Proceed to the children of a group or a non-interleaved clone.
2590  * For an interleaved clone, proceed only to the relevant child.
2591  */
2592  if (after->rsc->variant > pe_group
2593  && interleave == TRUE
2594  && (compatible_rsc == NULL
2595  || compatible_rsc != after_wrapper->action->rsc)) {
2596  continue;
2597  }
2598  }
2599 
2600  crm_trace("Proceeding through %s %s -> %s %s (type=0x%.6x)",
2601  after->uuid,
2602  after->node ? after->node->details->uname: "",
2603  after_wrapper->action->uuid,
2604  after_wrapper->action->node ? after_wrapper->action->node->details->uname : "",
2605  after_wrapper->type);
2606 
2607  order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
2608  }
2609 }
2610 
2611 static void clear_actions_tracking_flag(pe_working_set_t * data_set)
2612 {
2613  GListPtr gIter = NULL;
2614 
2615  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2616  pe_action_t *action = (pe_action_t *) gIter->data;
2617 
2618  if (is_set(action->flags, pe_action_tracking)) {
2620  }
2621  }
2622 }
2623 
2624 static void
2625 order_first_rsc_probes(pe_resource_t * rsc, pe_working_set_t * data_set)
2626 {
2627  GListPtr gIter = NULL;
2628  GListPtr probes = NULL;
2629 
2630  for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
2631  pe_resource_t * child = (pe_resource_t *) gIter->data;
2632 
2633  order_first_rsc_probes(child, data_set);
2634  }
2635 
2636  if (rsc->variant != pe_native) {
2637  return;
2638  }
2639 
2640  probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
2641 
2642  for (gIter = probes; gIter != NULL; gIter= gIter->next) {
2643  pe_action_t *probe = (pe_action_t *) gIter->data;
2644  GListPtr aIter = NULL;
2645 
2646  for (aIter = probe->actions_after; aIter != NULL; aIter = aIter->next) {
2647  pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) aIter->data;
2648 
2649  order_first_probe_then_restart_repromote(probe, after_wrapper->action, data_set);
2650  clear_actions_tracking_flag(data_set);
2651  }
2652  }
2653 
2654  g_list_free(probes);
2655 }
2656 
2657 static void
2658 order_first_probes(pe_working_set_t * data_set)
2659 {
2660  GListPtr gIter = NULL;
2661 
2662  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2663  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
2664 
2665  order_first_rsc_probes(rsc, data_set);
2666  }
2667 
2668  order_first_probes_imply_stops(data_set);
2669 }
2670 
2671 static void
2672 order_then_probes(pe_working_set_t * data_set)
2673 {
2674 #if 0
2675  GListPtr gIter = NULL;
2676 
2677  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2678  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
2679 
2680  /* Given "A then B", we would prefer to wait for A to be
2681  * started before probing B.
2682  *
2683  * If A was a filesystem on which the binaries and data for B
2684  * lived, it would have been useful if the author of B's agent
2685  * could assume that A is running before B.monitor will be
2686  * called.
2687  *
2688  * However we can't _only_ probe once A is running, otherwise
2689  * we'd not detect the state of B if A could not be started
2690  * for some reason.
2691  *
2692  * In practice however, we cannot even do an opportunistic
2693  * version of this because B may be moving:
2694  *
2695  * B.probe -> B.start
2696  * B.probe -> B.stop
2697  * B.stop -> B.start
2698  * A.stop -> A.start
2699  * A.start -> B.probe
2700  *
2701  * So far so good, but if we add the result of this code:
2702  *
2703  * B.stop -> A.stop
2704  *
2705  * Then we get a loop:
2706  *
2707  * B.probe -> B.stop -> A.stop -> A.start -> B.probe
2708  *
2709  * We could kill the 'B.probe -> B.stop' dependency, but that
2710  * could mean stopping B "too" soon, because B.start must wait
2711  * for the probes to complete.
2712  *
2713  * Another option is to allow it only if A is a non-unique
2714  * clone with clone-max == node-max (since we'll never be
2715  * moving it). However, we could still be stopping one
2716  * instance at the same time as starting another.
2717 
2718  * The complexity of checking for allowed conditions combined
2719  * with the ever narrowing usecase suggests that this code
2720  * should remain disabled until someone gets smarter.
2721  */
2722  pe_action_t *start = NULL;
2723  GListPtr actions = NULL;
2724  GListPtr probes = NULL;
2725 
2726  actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE);
2727 
2728  if (actions) {
2729  start = actions->data;
2730  g_list_free(actions);
2731  }
2732 
2733  if(start == NULL) {
2734  crm_err("No start action for %s", rsc->id);
2735  continue;
2736  }
2737 
2738  probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
2739 
2740  for (actions = start->actions_before; actions != NULL; actions = actions->next) {
2741  pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data;
2742 
2743  GListPtr pIter = NULL;
2744  pe_action_t *first = before->action;
2745  pe_resource_t *first_rsc = first->rsc;
2746 
2747  if(first->required_runnable_before) {
2748  GListPtr clone_actions = NULL;
2749  for (clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) {
2750  before = (pe_action_wrapper_t *) clone_actions->data;
2751 
2752  crm_trace("Testing %s -> %s (%p) for %s", first->uuid, before->action->uuid, before->action->rsc, start->uuid);
2753 
2754  CRM_ASSERT(before->action->rsc);
2755  first_rsc = before->action->rsc;
2756  break;
2757  }
2758 
2759  } else if(safe_str_neq(first->task, RSC_START)) {
2760  crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
2761  }
2762 
2763  if(first_rsc == NULL) {
2764  continue;
2765 
2766  } else if(uber_parent(first_rsc) == uber_parent(start->rsc)) {
2767  crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
2768  continue;
2769 
2770  } else if(FALSE && pe_rsc_is_clone(uber_parent(first_rsc)) == FALSE) {
2771  crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
2772  continue;
2773  }
2774 
2775  crm_err("Applying %s before %s %d", first->uuid, start->uuid, uber_parent(first_rsc)->variant);
2776 
2777  for (pIter = probes; pIter != NULL; pIter = pIter->next) {
2778  pe_action_t *probe = (pe_action_t *) pIter->data;
2779 
2780  crm_err("Ordering %s before %s", first->uuid, probe->uuid);
2781  order_actions(first, probe, pe_order_optional);
2782  }
2783  }
2784  }
2785 #endif
2786 }
2787 
2788 static void
2789 order_probes(pe_working_set_t * data_set)
2790 {
2791  order_first_probes(data_set);
2792  order_then_probes(data_set);
2793 }
2794 
2795 gboolean
2797 {
2798  GList *gIter = NULL;
2799 
2800  crm_trace("Applying ordering constraints");
2801 
2802  /* Don't ask me why, but apparently they need to be processed in
2803  * the order they were created in... go figure
2804  *
2805  * Also g_list_append() has horrendous performance characteristics
2806  * So we need to use g_list_prepend() and then reverse the list here
2807  */
2808  data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints);
2809 
2810  for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
2811  pe__ordering_t *order = gIter->data;
2812  pe_resource_t *rsc = order->lh_rsc;
2813 
2814  crm_trace("Applying ordering constraint: %d", order->id);
2815 
2816  if (rsc != NULL) {
2817  crm_trace("rsc_action-to-*");
2818  rsc_order_first(rsc, order, data_set);
2819  continue;
2820  }
2821 
2822  rsc = order->rh_rsc;
2823  if (rsc != NULL) {
2824  crm_trace("action-to-rsc_action");
2825  rsc_order_then(order->lh_action, rsc, order);
2826 
2827  } else {
2828  crm_trace("action-to-action");
2829  order_actions(order->lh_action, order->rh_action, order->type);
2830  }
2831  }
2832 
2833  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2834  pe_action_t *action = (pe_action_t *) gIter->data;
2835 
2836  update_colo_start_chain(action, data_set);
2837  }
2838 
2839  crm_trace("Ordering probes");
2840  order_probes(data_set);
2841 
2842  crm_trace("Updating %d actions", g_list_length(data_set->actions));
2843  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2844  pe_action_t *action = (pe_action_t *) gIter->data;
2845 
2846  update_action(action, data_set);
2847  }
2848 
2849  // Check for invalid orderings
2850  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
2851  pe_action_t *action = (pe_action_t *) gIter->data;
2852  pe_action_wrapper_t *input = NULL;
2853 
2854  for (GList *input_iter = action->actions_before;
2855  input_iter != NULL; input_iter = input_iter->next) {
2856 
2857  input = (pe_action_wrapper_t *) input_iter->data;
2858  if (pcmk__ordering_is_invalid(action, input)) {
2859  input->type = pe_order_none;
2860  }
2861  }
2862  }
2863 
2864  LogNodeActions(data_set, FALSE);
2865  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
2866  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
2867 
2868  LogActions(rsc, data_set, FALSE);
2869  }
2870  return TRUE;
2871 }
2872 
2873 static int transition_id = -1;
2874 
2881 void
2882 pcmk__log_transition_summary(const char *filename)
2883 {
2884  if (was_processing_error) {
2885  crm_err("Calculated transition %d (with errors), saving inputs in %s",
2886  transition_id, filename);
2887 
2888  } else if (was_processing_warning) {
2889  crm_warn("Calculated transition %d (with warnings), saving inputs in %s",
2890  transition_id, filename);
2891 
2892  } else {
2893  crm_notice("Calculated transition %d, saving inputs in %s",
2894  transition_id, filename);
2895  }
2896  if (crm_config_error) {
2897  crm_notice("Configuration errors found during scheduler processing,"
2898  " please run \"crm_verify -L\" to identify issues");
2899  }
2900 }
2901 
2902 /*
2903  * Create a dependency graph to send to the transitioner (via the controller)
2904  */
2905 gboolean
2907 {
2908  GListPtr gIter = NULL;
2909  const char *value = NULL;
2910 
2911  transition_id++;
2912  crm_trace("Creating transition graph %d.", transition_id);
2913 
2914  data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
2915 
2916  value = pe_pref(data_set->config_hash, "cluster-delay");
2917  crm_xml_add(data_set->graph, "cluster-delay", value);
2918 
2919  value = pe_pref(data_set->config_hash, "stonith-timeout");
2920  crm_xml_add(data_set->graph, "stonith-timeout", value);
2921 
2922  crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY");
2923 
2924  if (is_set(data_set->flags, pe_flag_start_failure_fatal)) {
2925  crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY");
2926  } else {
2927  crm_xml_add(data_set->graph, "failed-start-offset", "1");
2928  }
2929 
2930  value = pe_pref(data_set->config_hash, "batch-limit");
2931  crm_xml_add(data_set->graph, "batch-limit", value);
2932 
2933  crm_xml_add_int(data_set->graph, "transition_id", transition_id);
2934 
2935  value = pe_pref(data_set->config_hash, "migration-limit");
2936  if (crm_parse_ll(value, NULL) > 0) {
2937  crm_xml_add(data_set->graph, "migration-limit", value);
2938  }
2939 
2940  if (data_set->recheck_by > 0) {
2941  char *recheck_epoch = NULL;
2942 
2943  recheck_epoch = crm_strdup_printf("%llu",
2944  (long long) data_set->recheck_by);
2945  crm_xml_add(data_set->graph, "recheck-by", recheck_epoch);
2946  free(recheck_epoch);
2947  }
2948 
2949 /* errors...
2950  slist_iter(action, pe_action_t, action_list, lpc,
2951  if(action->optional == FALSE && action->runnable == FALSE) {
2952  print_action("Ignoring", action, TRUE);
2953  }
2954  );
2955 */
2956 
2957  /* The following code will de-duplicate action inputs, so nothing past this
2958  * should rely on the action input type flags retaining their original
2959  * values.
2960  */
2961 
2962  gIter = data_set->resources;
2963  for (; gIter != NULL; gIter = gIter->next) {
2964  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
2965 
2966  pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id);
2967  rsc->cmds->expand(rsc, data_set);
2968  }
2969 
2970  crm_log_xml_trace(data_set->graph, "created resource-driven action list");
2971 
2972  /* pseudo action to distribute list of nodes with maintenance state update */
2973  add_maintenance_update(data_set);
2974 
2975  /* catch any non-resource specific actions */
2976  crm_trace("processing non-resource actions");
2977 
2978  gIter = data_set->actions;
2979  for (; gIter != NULL; gIter = gIter->next) {
2980  pe_action_t *action = (pe_action_t *) gIter->data;
2981 
2982  if (action->rsc
2983  && action->node
2984  && action->node->details->shutdown
2985  && is_not_set(action->rsc->flags, pe_rsc_maintenance)
2986  && is_not_set(action->flags, pe_action_optional)
2987  && is_not_set(action->flags, pe_action_runnable)
2988  && crm_str_eq(action->task, RSC_STOP, TRUE)
2989  ) {
2990  /* Eventually we should just ignore the 'fence' case
2991  * But for now it's the best way to detect (in CTS) when
2992  * CIB resource updates are being lost
2993  */
2994  if (is_set(data_set->flags, pe_flag_have_quorum)
2995  || data_set->no_quorum_policy == no_quorum_ignore) {
2996  crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)",
2997  action->node->details->unclean ? "fence" : "shut down",
2998  action->node->details->uname, action->rsc->id,
2999  is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked",
3000  is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : "",
3001  action->uuid);
3002  }
3003  }
3004 
3005  graph_element_from_action(action, data_set);
3006  }
3007 
3008  crm_log_xml_trace(data_set->graph, "created generic action list");
3009  crm_trace("Created transition graph %d.", transition_id);
3010 
3011  return TRUE;
3012 }
3013 
3014 void
3015 LogNodeActions(pe_working_set_t * data_set, gboolean terminal)
3016 {
3017  GListPtr gIter = NULL;
3018 
3019  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
3020  char *node_name = NULL;
3021  char *task = NULL;
3022  pe_action_t *action = (pe_action_t *) gIter->data;
3023 
3024  if (action->rsc != NULL) {
3025  continue;
3026  } else if (is_set(action->flags, pe_action_optional)) {
3027  continue;
3028  }
3029 
3030  if (pe__is_guest_node(action->node)) {
3031  node_name = crm_strdup_printf("%s (resource: %s)", action->node->details->uname, action->node->details->remote_rsc->container->id);
3032  } else if(action->node) {
3033  node_name = crm_strdup_printf("%s", action->node->details->uname);
3034  }
3035 
3036 
3037  if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
3038  task = strdup("Shutdown");
3039  } else if (safe_str_eq(action->task, CRM_OP_FENCE)) {
3040  const char *op = g_hash_table_lookup(action->meta, "stonith_action");
3041  task = crm_strdup_printf("Fence (%s)", op);
3042  }
3043 
3044  if(task == NULL) {
3045  /* Nothing to report */
3046  } else if(terminal && action->reason) {
3047  printf(" * %s %s '%s'\n", task, node_name, action->reason);
3048  } else if(terminal) {
3049  printf(" * %s %s\n", task, node_name);
3050  } else if(action->reason) {
3051  crm_notice(" * %s %s '%s'\n", task, node_name, action->reason);
3052  } else {
3053  crm_notice(" * %s %s\n", task, node_name);
3054  }
3055 
3056  free(node_name);
3057  free(task);
3058  }
3059 }
pe_action_t * pe_cancel_op(pe_resource_t *rsc, const char *name, guint interval_ms, pe_node_t *node, pe_working_set_t *data_set)
#define LOG_TRACE
Definition: logging.h:36
void pe__foreach_param_check(pe_working_set_t *data_set, void(*cb)(pe_resource_t *, pe_node_t *, xmlNode *, enum pe_check_parameters, pe_working_set_t *))
Definition: remote.c:248
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:233
GListPtr nodes
Definition: pe_types.h:146
xmlNode * find_xml_node(xmlNode *cib, const char *node_path, gboolean must_find)
Definition: xml.c:1764
void group_append_meta(pe_resource_t *rsc, xmlNode *xml)
enum pe_action_flags clone_action_flags(pe_action_t *action, pe_node_t *node)
pe_action_t * lh_action
Definition: internal.h:68
enum pe_quorum_policy no_quorum_policy
Definition: pe_types.h:138
#define RSC_STOP
Definition: crm.h:199
void clone_append_meta(pe_resource_t *rsc, xmlNode *xml)
void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set)
A dumping ground.
gboolean parse_op_key(const char *key, char **rsc_id, char **op_type, guint *interval_ms)
Definition: operations.c:48
#define crm_notice(fmt, args...)
Definition: logging.h:365
#define CRMD_ACTION_MIGRATED
Definition: crm.h:169
#define pe_flag_stop_action_orphans
Definition: pe_types.h:101
GHashTable * attrs
Definition: pe_types.h:221
gboolean(* create_probe)(pe_resource_t *, pe_node_t *, pe_action_t *, gboolean, pe_working_set_t *)
enum pe_action_flags pcmk__bundle_action_flags(pe_action_t *action, pe_node_t *node)
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:18
#define XML_CONFIG_ATTR_SHUTDOWN_LOCK
Definition: msg_xml.h:349
#define crm_crit(fmt, args...)
Definition: logging.h:362
gboolean safe_str_neq(const char *a, const char *b)
Definition: strings.c:263
#define INFINITY
Definition: crm.h:95
gboolean stage2(pe_working_set_t *data_set)
gboolean stage5(pe_working_set_t *data_set)
#define CRM_OP_FENCE
Definition: crm.h:141
gboolean stage3(pe_working_set_t *data_set)
#define XML_ATTR_TRANSITION_MAGIC
Definition: msg_xml.h:360
enum rsc_role_e(* state)(const pe_resource_t *, gboolean)
Definition: pe_types.h:52
GList * sort_nodes_by_weight(GList *nodes, pe_node_t *active_node, pe_working_set_t *data_set)
pe_check_parameters
Definition: pe_types.h:182
#define XML_TAG_GRAPH
Definition: msg_xml.h:288
#define stop_action(rsc, node, optional)
Definition: internal.h:297
void complex_set_cmds(pe_resource_t *rsc)
void pe__add_param_check(xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, enum pe_check_parameters, pe_working_set_t *data_set)
Definition: remote.c:222
pe_resource_t * container
Definition: pe_types.h:364
pe_node_t * partial_migration_source
Definition: pe_types.h:349
#define pe_flag_concurrent_fencing
Definition: pe_types.h:98
#define XML_ATTR_TYPE
Definition: msg_xml.h:99
void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set)
Definition: utils.c:2681
void pcmk__log_transition_summary(const char *filename)
#define XML_CIB_TAG_CONSTRAINTS
Definition: msg_xml.h:143
#define CRM_OP_REPROBE
Definition: crm.h:150
resource_alloc_functions_t * cmds
Definition: pe_types.h:317
#define pe_flag_symmetric_cluster
Definition: pe_types.h:92
bool pcmk__ordering_is_invalid(pe_action_t *action, pe_action_wrapper_t *input)
xmlNode * get_object_root(const char *object_type, xmlNode *the_root)
Definition: cib_utils.c:144
void clone_rsc_colocation_lh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
#define pe_flag_no_compat
Definition: pe_types.h:123
gboolean stage0(pe_working_set_t *data_set)
int pcmk__guint_from_hash(GHashTable *table, const char *key, guint default_val, guint *result)
Definition: strings.c:162
xmlNode * xml
Definition: pe_types.h:307
pe_resource_t * rsc
Definition: pe_types.h:388
enum rsc_role_e next_role
Definition: pe_types.h:355
#define pe__show_node_weights(level, rsc, text, nodes)
Definition: internal.h:273
void add_maintenance_update(pe_working_set_t *data_set)
const char * crm_xml_add_int(xmlNode *node, const char *name, int value)
Create an XML attribute with specified name and integer value.
Definition: nvpair.c:424
gboolean exclusive_discover
Definition: pe_types.h:336
int char2score(const char *score)
Definition: utils.c:59
pe_resource_t * remote_rsc
Definition: pe_types.h:217
pe_action_t * sched_shutdown_op(pe_node_t *node, pe_working_set_t *data_set)
resource_alloc_functions_t resource_class_alloc_functions[]
GHashTable * meta
Definition: pe_types.h:357
#define pe_rsc_unique
Definition: pe_types.h:241
resource_object_functions_t * fns
Definition: pe_types.h:316
gboolean pe__is_guest_or_remote_node(pe_node_t *node)
Definition: remote.c:58
#define XML_LRM_TAG_RESOURCE
Definition: msg_xml.h:227
void pe_action_set_flag_reason(const char *function, long line, pe_action_t *action, pe_action_t *reason, const char *text, enum pe_action_flags flags, bool overwrite)
Definition: utils.c:2574
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition: nvpair.c:316
remote_connection_state
void ReloadRsc(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set)
int crm_parse_int(const char *text, const char *default_text)
Parse an integer value from a string.
Definition: strings.c:126
void pcmk__bundle_rsc_colocation_rh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
GListPtr resources
Definition: pe_types.h:147
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
Definition: utils.c:1653
gboolean unpack_constraints(xmlNode *xml_constraints, pe_working_set_t *data_set)
#define pe_flag_have_status
Definition: pe_types.h:110
void group_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
enum action_tasks text2task(const char *task)
Definition: common.c:358
time_t get_effective_time(pe_working_set_t *data_set)
Definition: utils.c:1770
const char * pe_pref(GHashTable *options, const char *name)
Definition: common.c:315
pe_action_t * rh_action
Definition: internal.h:73
void resource_location(pe_resource_t *rsc, pe_node_t *node, int score, const char *tag, pe_working_set_t *data_set)
Definition: utils.c:1609
xmlNode * params_restart
Definition: internal.h:399
enum pe_graph_flags native_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set)
void native_expand(pe_resource_t *rsc, pe_working_set_t *data_set)
#define clear_bit(word, bit)
Definition: crm_internal.h:69
#define XML_CIB_TAG_LRM
Definition: msg_xml.h:225
void native_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
pe_resource_t * rsc_lh
Definition: internal.h:56
pe_node_t * partial_migration_target
Definition: pe_types.h:348
#define RSC_START
Definition: crm.h:196
int migration_threshold
Definition: pe_types.h:328
pe_node_t * allocated_to
Definition: pe_types.h:347
pe_action_t * action
Definition: pe_types.h:507
GHashTable * pcmk__native_merge_weights(pe_resource_t *rsc, const char *rhs, GHashTable *nodes, const char *attr, float factor, uint32_t flags)
gboolean remote_was_fenced
Definition: pe_types.h:212
gboolean can_run_resources(const pe_node_t *node)
#define pe_flag_have_quorum
Definition: pe_types.h:91
bool pe__bundle_needs_remote_name(pe_resource_t *rsc)
Definition: bundle.c:956
#define CRM_SCORE_INFINITY
Definition: crm.h:81
gboolean remote_requires_reset
Definition: pe_types.h:211
GListPtr actions_before
Definition: pe_types.h:425
char * reason
Definition: pe_types.h:395
pe_node_t * pe_find_node_id(GListPtr node_list, const char *id)
Definition: status.c:411
const char * action
Definition: pcmk_fence.c:29
void trigger_unfencing(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t *data_set)
Definition: utils.c:2504
pe_node_t * lock_node
Definition: pe_types.h:368
gboolean pe__is_remote_node(pe_node_t *node)
Definition: remote.c:36
#define CRM_TRACE_INIT_DATA(name)
Definition: logging.h:134
#define CRMD_ACTION_START
Definition: crm.h:171
void group_rsc_colocation_lh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
gboolean is_dc
Definition: pe_types.h:208
void native_rsc_colocation_lh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
GListPtr placement_constraints
Definition: pe_types.h:148
#define XML_LRM_ATTR_TASK
Definition: msg_xml.h:260
pe__location_t * rsc2node_new(const char *id, pe_resource_t *rsc, int weight, const char *discovery_mode, pe_node_t *node, pe_working_set_t *data_set)
void native_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
#define CRMD_ACTION_STOP
Definition: crm.h:174
#define CRM_OP_CLEAR_FAILCOUNT
Definition: crm.h:151
#define pe_warn(fmt...)
Definition: internal.h:22
int weight
Definition: pe_types.h:228
gboolean crm_config_error
Definition: utils.c:50
#define pe_flag_have_remote_nodes
Definition: pe_types.h:111
#define crm_warn(fmt, args...)
Definition: logging.h:364
guint remote_reconnect_ms
Definition: pe_types.h:329
void LogActions(pe_resource_t *rsc, pe_working_set_t *data_set, gboolean terminal)
void native_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
#define set_bit(word, bit)
Definition: crm_internal.h:68
void(* create_actions)(pe_resource_t *, pe_working_set_t *)
void clone_expand(pe_resource_t *rsc, pe_working_set_t *data_set)
pe_action_flags
Definition: pe_types.h:276
int rc
Definition: pcmk_fence.c:34
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition: nvpair.c:614
#define pe_rsc_failed
Definition: pe_types.h:252
#define crm_debug(fmt, args...)
Definition: logging.h:368
gboolean stage8(pe_working_set_t *data_set)
pe_resource_t * uber_parent(pe_resource_t *rsc)
Definition: complex.c:762
#define pe_flag_sanitized
Definition: pe_types.h:114
gboolean pe__resource_is_remote_conn(pe_resource_t *rsc, pe_working_set_t *data_set)
Definition: remote.c:17
pe_node_t * pcmk__clone_allocate(pe_resource_t *rsc, pe_node_t *preferred, pe_working_set_t *data_set)
#define XML_CIB_ATTR_SHUTDOWN
Definition: msg_xml.h:246
#define XML_ATTR_ID
Definition: msg_xml.h:96
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:522
#define XML_BOOLEAN_TRUE
Definition: msg_xml.h:107
#define XML_CIB_TAG_STATE
Definition: msg_xml.h:158
#define stop_key(rsc)
Definition: internal.h:296
enum pe_graph_flags group_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set)
#define pe_rsc_start_pending
Definition: pe_types.h:254
char * task
Definition: pe_types.h:392
gboolean stage4(pe_working_set_t *data_set)
void group_rsc_colocation_rh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
gboolean update_action(pe_action_t *action, pe_working_set_t *data_set)
int custom_action_order(pe_resource_t *lh_rsc, char *lh_task, pe_action_t *lh_action, pe_resource_t *rh_rsc, char *rh_task, pe_action_t *rh_action, enum pe_ordering type, pe_working_set_t *data_set)
#define crm_trace(fmt, args...)
Definition: logging.h:369
enum rsc_digest_cmp_val rc
Definition: internal.h:396
void pe_fence_node(pe_working_set_t *data_set, pe_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:85
char * digest_secure_calc
Definition: internal.h:401
void calculate_active_ops(GList *sorted_op_list, int *start_index, int *stop_index)
Definition: unpack.c:2140
GHashTable * meta
Definition: pe_types.h:402
GListPtr find_actions(GListPtr input, const char *key, const pe_node_t *on_node)
Definition: utils.c:1472
struct pe_node_shared_s * details
Definition: pe_types.h:231
GListPtr running_on
Definition: pe_types.h:350
pe_node_t * node
Definition: pe_types.h:389
bool pe__shutdown_requested(pe_node_t *node)
Definition: utils.c:2666
#define XML_AGENT_ATTR_PROVIDER
Definition: msg_xml.h:230
gboolean order_actions(pe_action_t *lh_action, pe_action_t *rh_action, enum pe_ordering order)
Definition: utils.c:1823
unsigned long long flags
Definition: pe_types.h:332
const char * uname
Definition: pe_types.h:196
GHashTable * pcmk__group_merge_weights(pe_resource_t *rsc, const char *rhs, GHashTable *nodes, const char *attr, float factor, uint32_t flags)
GListPtr actions
Definition: pe_types.h:153
Wrappers for and extensions to libxml2.
#define XML_ATTR_TE_NOWAIT
Definition: msg_xml.h:363
GHashTable * config_hash
Definition: pe_types.h:140
char * clone_name
Definition: pe_types.h:306
xmlNode * find_rsc_op_entry(pe_resource_t *rsc, const char *key)
Definition: utils.c:1304
void(* expand)(pe_resource_t *, pe_working_set_t *)
pe_resource_t * lh_rsc
Definition: internal.h:67
void LogNodeActions(pe_working_set_t *data_set, gboolean terminal)
xmlNode * create_xml_node(xmlNode *parent, const char *name)
Definition: xml.c:1976
time_t lock_time
Definition: pe_types.h:369
time_t recheck_by
Definition: pe_types.h:176
#define pe_flag_stonith_enabled
Definition: pe_types.h:95
const char * pe_node_attribute_raw(pe_node_t *node, const char *name)
Definition: common.c:629
gboolean native_create_probe(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set)
enum pe_ordering type
Definition: internal.h:64
char * uuid
Definition: pe_types.h:393
#define XML_LRM_ATTR_RESTART_DIGEST
Definition: msg_xml.h:276
gboolean update_action_flags(pe_action_t *action, enum pe_action_flags flags, const char *source, int line)
void group_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
pe_node_t * pcmk__bundle_allocate(pe_resource_t *rsc, pe_node_t *preferred, pe_working_set_t *data_set)
enum pe_obj_types variant
Definition: pe_types.h:314
gboolean xml_has_children(const xmlNode *root)
Definition: xml.c:3331
xmlNode * input
Definition: pe_types.h:126
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case)
Definition: strings.c:326
const char * placement_strategy
Definition: pe_types.h:133
int rsc_discover_mode
Definition: pe_types.h:232
xmlNode * params_all
Definition: internal.h:397
GListPtr actions
Definition: pe_types.h:343
#define CRM_OP_SHUTDOWN
Definition: crm.h:140
void(* rsc_location)(pe_resource_t *, pe__location_t *)
void pe__free_param_checks(pe_working_set_t *data_set)
Definition: remote.c:263
const char * id
Definition: pe_types.h:195
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition: operations.c:40
gboolean pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set)
guint shutdown_lock
Definition: pe_types.h:178
GListPtr ordering_constraints
Definition: pe_types.h:149
bool pe_can_fence(pe_working_set_t *data_set, pe_node_t *node)
Definition: utils.c:87
int pcmk__score_green
Definition: utils.c:55
pe_resource_t * find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set)
#define PCMK_RESOURCE_CLASS_STONITH
Definition: services.h:49
gboolean rsc_discovery_enabled
Definition: pe_types.h:210
#define XML_LRM_ATTR_SECURE_DIGEST
Definition: msg_xml.h:277
void group_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
gboolean show_utilization
void dump_node_capacity(int level, const char *comment, pe_node_t *node)
Definition: utils.c:395
enum pe_action_flags group_action_flags(pe_action_t *action, pe_node_t *node)
void update_colo_start_chain(pe_action_t *action, pe_working_set_t *data_set)
gboolean stage7(pe_working_set_t *data_set)
long long crm_parse_ll(const char *text, const char *default_text)
Parse a long long integer value from a string.
Definition: strings.c:100
gboolean pe__is_guest_node(pe_node_t *node)
Definition: remote.c:47
Cluster status and scheduling.
gboolean is_remote_node
Definition: pe_types.h:335
GListPtr children
Definition: pe_types.h:361
void add_hash_param(GHashTable *hash, const char *name, const char *value)
Definition: common.c:573
#define pe_set_action_bit(action, bit)
Definition: internal.h:25
void pcmk__bundle_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
int pcmk__score_red
Definition: utils.c:54
void clone_rsc_location(pe_resource_t *rsc, pe__location_t *constraint)
void clone_rsc_colocation_rh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
#define XML_LRM_TAG_RESOURCES
Definition: msg_xml.h:226
int pe__add_scores(int score1, int score2)
Definition: common.c:510
#define crm_err(fmt, args...)
Definition: logging.h:363
bool pcmk__ends_with(const char *s, const char *match)
Definition: strings.c:410
#define CRM_ASSERT(expr)
Definition: results.h:42
#define RSC_STATUS
Definition: crm.h:210
pe_action_t * pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_working_set_t *data_set)
Schedule a controller operation to clear a fail count.
Definition: failcounts.c:360
Cluster Configuration.
op_digest_cache_t * rsc_action_digest_cmp(pe_resource_t *rsc, xmlNode *xml_op, pe_node_t *node, pe_working_set_t *data_set)
Definition: utils.c:2095
#define RSC_PROMOTE
Definition: crm.h:202
gboolean cluster_status(pe_working_set_t *data_set)
Definition: status.c:70
#define pe_clear_action_bit(action, bit)
Definition: internal.h:26
int pcmk__score_yellow
Definition: utils.c:56
GListPtr actions_after
Definition: pe_types.h:426
#define XML_LRM_ATTR_INTERVAL_MS
Definition: msg_xml.h:258
gboolean stage6(pe_working_set_t *data_set)
#define crm_log_xml_info(xml, text)
Definition: logging.h:375
#define DIMOF(a)
Definition: crm.h:57
void clone_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
#define XML_LRM_ATTR_CALLID
Definition: msg_xml.h:272
#define CRMD_ACTION_MIGRATE
Definition: crm.h:168
void(* internal_constraints)(pe_resource_t *, pe_working_set_t *)
gboolean shutdown
Definition: pe_types.h:206
char data[0]
Definition: internal.h:90
#define crm_str(x)
Definition: logging.h:389
void pcmk__bundle_expand(pe_resource_t *rsc, pe_working_set_t *data_set)
gboolean clone_create_probe(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete, gboolean force, pe_working_set_t *data_set)
void native_append_meta(pe_resource_t *rsc, xmlNode *xml)
#define pe_flag_stdout
Definition: pe_types.h:115
enum pe_action_flags flags
Definition: pe_types.h:397
gboolean maintenance
Definition: pe_types.h:209
#define CRM_OP_PROBED
Definition: crm.h:149
#define pe_rsc_maintenance
Definition: pe_types.h:261
#define pe_rsc_failure_ignored
Definition: pe_types.h:260
pe_resource_t * rh_rsc
Definition: internal.h:72
#define XML_CIB_TAG_STATUS
Definition: msg_xml.h:139
#define crm_log_xml_trace(xml, text)
Definition: logging.h:377
gboolean crm_is_true(const char *s)
Definition: strings.c:278
#define XML_LRM_TAG_RSC_OP
Definition: msg_xml.h:228
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:19
#define start_key(rsc)
Definition: internal.h:302
pe_action_t * find_first_action(GListPtr input, const char *uuid, const char *task, pe_node_t *on_node)
Definition: utils.c:1442
#define ID(x)
Definition: msg_xml.h:418
unsigned long long flags
Definition: pe_types.h:135
#define pe_err(fmt...)
Definition: internal.h:21
bool pcmk__starts_with(const char *str, const char *prefix)
Check whether a string starts with a certain sequence.
Definition: strings.c:358
gboolean was_processing_error
Definition: common.c:20
int stickiness
Definition: pe_types.h:325
#define safe_str_eq(a, b)
Definition: util.h:65
#define XML_RSC_ATTR_INTERLEAVE
Definition: msg_xml.h:185
#define PCMK__OP_FMT
Definition: internal.h:147
GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: utils.c:1561
pe_action_t * pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set)
Definition: utils.c:2741
gboolean was_processing_warning
Definition: common.c:21
void clone_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
gboolean probe_resources(pe_working_set_t *data_set)
enum pe_ordering type
Definition: pe_types.h:505
gboolean unclean
Definition: pe_types.h:204
char * crm_strdup_printf(char const *format,...) __attribute__((__format__(__printf__
#define LOG_STDOUT
Definition: logging.h:41
GList * GListPtr
Definition: crm.h:214
#define pe_flag_start_failure_fatal
Definition: pe_types.h:104
enum node_type type
Definition: pe_types.h:197
gboolean DeleteRsc(pe_resource_t *rsc, pe_node_t *node, gboolean optional, pe_working_set_t *data_set)
void group_expand(pe_resource_t *rsc, pe_working_set_t *data_set)
void pcmk__bundle_create_actions(pe_resource_t *rsc, pe_working_set_t *data_set)
void graph_element_from_action(pe_action_t *action, pe_working_set_t *data_set)
#define crm_info(fmt, args...)
Definition: logging.h:366
enum pe_action_flags native_action_flags(pe_action_t *action, pe_node_t *node)
#define pe_rsc_managed
Definition: pe_types.h:236
#define pe_rsc_orphan
Definition: pe_types.h:235
enum pe_graph_flags pcmk__multi_update_actions(pe_action_t *first, pe_action_t *then, pe_node_t *node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type, pe_working_set_t *data_set)
void pcmk__bundle_internal_constraints(pe_resource_t *rsc, pe_working_set_t *data_set)
void set_alloc_actions(pe_working_set_t *data_set)
void pcmk__bundle_append_meta(pe_resource_t *rsc, xmlNode *xml)
pe_ordering
Definition: pe_types.h:461
gboolean online
Definition: pe_types.h:200
uint64_t flags
Definition: remote.c:149
int pe_get_failcount(pe_node_t *node, pe_resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set)
Definition: failcounts.c:251
action_tasks
Definition: common.h:47
pe_resource_t * parent
Definition: pe_types.h:312
pe_action_t * pe_fence_op(pe_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t *data_set)
Definition: utils.c:2398
#define pe_flag_shutdown_lock
Definition: pe_types.h:107
enum crm_ais_msg_types type
Definition: internal.h:83
#define RSC_DEMOTE
Definition: crm.h:204
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:17
void pcmk__bundle_rsc_colocation_lh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
void native_rsc_colocation_rh(pe_resource_t *lh_rsc, pe_resource_t *rh_rsc, rsc_colocation_t *constraint, pe_working_set_t *data_set)
pe_node_t * pcmk__native_allocate(pe_resource_t *rsc, pe_node_t *preferred, pe_working_set_t *data_set)
#define XML_AGENT_ATTR_CLASS
Definition: msg_xml.h:229
xmlNode * graph
Definition: pe_types.h:165
char * id
Definition: pe_types.h:305
GHashTable * allowed_nodes
Definition: pe_types.h:352
pe_action_t * custom_action(pe_resource_t *rsc, char *key, const char *task, pe_node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set)
Definition: utils.c:485
#define RSC_MIGRATED
Definition: crm.h:194
#define pe_flag_startup_probes
Definition: pe_types.h:109
pe_node_t * pcmk__group_allocate(pe_resource_t *rsc, pe_node_t *preferred, pe_working_set_t *data_set)
#define pe_flag_stop_rsc_orphans
Definition: pe_types.h:100