pacemaker  2.1.7-0f7f88312f
Scalable High-Availability cluster resource manager
pcmk_sched_probes.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2023 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <glib.h>
13 
14 #include <crm/crm.h>
15 #include <crm/pengine/status.h>
16 #include <pacemaker-internal.h>
17 #include "libpacemaker_private.h"
18 
27 static void
28 add_expected_result(pcmk_action_t *probe, const pcmk_resource_t *rsc,
29  const pcmk_node_t *node)
30 {
31  // Check whether resource is currently active on node
32  pcmk_node_t *running = pe_find_node_id(rsc->running_on, node->details->id);
33 
34  // The expected result is what we think the resource's current state is
35  if (running == NULL) {
37 
38  } else if (rsc->role == pcmk_role_promoted) {
40  }
41 }
42 
52 bool
54 {
55  bool any_created = false;
56 
57  for (GList *iter = rscs; iter != NULL; iter = iter->next) {
58  pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
59 
60  if (rsc->cmds->create_probe(rsc, node)) {
61  any_created = true;
62  }
63  }
64  return any_created;
65 }
66 
74 static void
75 probe_then_start(pcmk_resource_t *rsc1, pcmk_resource_t *rsc2)
76 {
77  if ((rsc1->allocated_to != NULL)
78  && (g_hash_table_lookup(rsc1->known_on,
79  rsc1->allocated_to->details->id) == NULL)) {
80 
83  NULL,
85  NULL,
87  }
88 }
89 
98 static bool
99 guest_resource_will_stop(const pcmk_node_t *node)
100 {
101  const pcmk_resource_t *guest_rsc = node->details->remote_rsc->container;
102 
103  /* Ideally, we'd check whether the guest has a required stop, but that
104  * information doesn't exist yet, so approximate it ...
105  */
106  return node->details->remote_requires_reset
107  || node->details->unclean
108  || pcmk_is_set(guest_rsc->flags, pcmk_rsc_failed)
109  || (guest_rsc->next_role == pcmk_role_stopped)
110 
111  // Guest is moving
112  || ((guest_rsc->role > pcmk_role_stopped)
113  && (guest_rsc->allocated_to != NULL)
114  && (pe_find_node(guest_rsc->running_on,
115  guest_rsc->allocated_to->details->uname) == NULL));
116 }
117 
127 static pcmk_action_t *
128 probe_action(pcmk_resource_t *rsc, pcmk_node_t *node)
129 {
130  pcmk_action_t *probe = NULL;
131  char *key = pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0);
132 
133  crm_debug("Scheduling probe of %s %s on %s",
134  role2text(rsc->role), rsc->id, pe__node_name(node));
135 
136  probe = custom_action(rsc, key, PCMK_ACTION_MONITOR, node, FALSE,
137  rsc->cluster);
139 
140  pcmk__order_vs_unfence(rsc, node, probe, pcmk__ar_ordered);
141  add_expected_result(probe, rsc, node);
142  return probe;
143 }
144 
156 bool
158 {
159  uint32_t flags = pcmk__ar_ordered;
160  pcmk_action_t *probe = NULL;
161  pcmk_node_t *allowed = NULL;
162  pcmk_resource_t *top = uber_parent(rsc);
163  const char *reason = NULL;
164 
165  CRM_ASSERT((rsc != NULL) && (node != NULL));
166 
168  reason = "start-up probes are disabled";
169  goto no_probe;
170  }
171 
172  if (pe__is_guest_or_remote_node(node)) {
173  const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
174 
175  if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) {
176  reason = "Pacemaker Remote nodes cannot run stonith agents";
177  goto no_probe;
178 
179  } else if (pe__is_guest_node(node)
180  && pe__resource_contains_guest_node(rsc->cluster, rsc)) {
181  reason = "guest nodes cannot run resources containing guest nodes";
182  goto no_probe;
183 
184  } else if (rsc->is_remote_node) {
185  reason = "Pacemaker Remote nodes cannot host remote connections";
186  goto no_probe;
187  }
188  }
189 
190  // If this is a collective resource, probes are created for its children
191  if (rsc->children != NULL) {
192  return pcmk__probe_resource_list(rsc->children, node);
193  }
194 
195  if ((rsc->container != NULL) && !rsc->is_remote_node) {
196  reason = "resource is inside a container";
197  goto no_probe;
198 
199  } else if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
200  reason = "resource is orphaned";
201  goto no_probe;
202 
203  } else if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
204  reason = "resource state is already known";
205  goto no_probe;
206  }
207 
208  allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
209 
210  if (rsc->exclusive_discover || top->exclusive_discover) {
211  // Exclusive discovery is enabled ...
212 
213  if (allowed == NULL) {
214  // ... but this node is not allowed to run the resource
215  reason = "resource has exclusive discovery but is not allowed "
216  "on node";
217  goto no_probe;
218 
219  } else if (allowed->rsc_discover_mode != pcmk_probe_exclusive) {
220  // ... but no constraint marks this node for discovery of resource
221  reason = "resource has exclusive discovery but is not enabled "
222  "on node";
223  goto no_probe;
224  }
225  }
226 
227  if (allowed == NULL) {
228  allowed = node;
229  }
230  if (allowed->rsc_discover_mode == pcmk_probe_never) {
231  reason = "node has discovery disabled";
232  goto no_probe;
233  }
234 
235  if (pe__is_guest_node(node)) {
236  pcmk_resource_t *guest = node->details->remote_rsc->container;
237 
238  if (guest->role == pcmk_role_stopped) {
239  // The guest is stopped, so we know no resource is active there
240  reason = "node's guest is stopped";
241  probe_then_start(guest, top);
242  goto no_probe;
243 
244  } else if (guest_resource_will_stop(node)) {
245  reason = "node's guest will stop";
246 
247  // Order resource start after guest stop (in case it's restarting)
248  pcmk__new_ordering(guest,
249  pcmk__op_key(guest->id, PCMK_ACTION_STOP, 0),
250  NULL, top,
252  NULL, pcmk__ar_ordered, rsc->cluster);
253  goto no_probe;
254  }
255  }
256 
257  // We've eliminated all cases where a probe is not needed, so now it is
258  probe = probe_action(rsc, node);
259 
260  /* Below, we will order the probe relative to start or reload. If this is a
261  * clone instance, the start or reload is for the entire clone rather than
262  * just the instance. Otherwise, the start or reload is for the resource
263  * itself.
264  */
265  if (!pe_rsc_is_clone(top)) {
266  top = rsc;
267  }
268 
269  /* Prevent a start if the resource can't be probed, but don't cause the
270  * resource or entire clone to stop if already active.
271  */
273  && (top->running_on == NULL)) {
275  }
276 
277  // Start or reload after probing the resource
278  pcmk__new_ordering(rsc, NULL, probe,
279  top, pcmk__op_key(top->id, PCMK_ACTION_START, 0), NULL,
280  flags, rsc->cluster);
281  pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
282  pcmk__ar_ordered, rsc->cluster);
283 
284  return true;
285 
286 no_probe:
287  pe_rsc_trace(rsc,
288  "Skipping probe for %s on %s because %s",
289  rsc->id, node->details->id, reason);
290  return false;
291 }
292 
302 static bool
303 probe_needed_before_action(const pcmk_action_t *probe,
304  const pcmk_action_t *then)
305 {
306  // Probes on a node are performed after unfencing it, not before
307  if (pcmk__str_eq(then->task, PCMK_ACTION_STONITH, pcmk__str_none)
308  && pe__same_node(probe->node, then->node)) {
309  const char *op = g_hash_table_lookup(then->meta, "stonith_action");
310 
311  if (pcmk__str_eq(op, PCMK_ACTION_ON, pcmk__str_casei)) {
312  return false;
313  }
314  }
315 
316  // Probes should be done on a node before shutting it down
317  if (pcmk__str_eq(then->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)
318  && (probe->node != NULL) && (then->node != NULL)
319  && !pe__same_node(probe->node, then->node)) {
320  return false;
321  }
322 
323  // Otherwise probes should always be done before any other action
324  return true;
325 }
326 
340 static void
341 add_probe_orderings_for_stops(pcmk_scheduler_t *scheduler)
342 {
343  for (GList *iter = scheduler->ordering_constraints; iter != NULL;
344  iter = iter->next) {
345 
346  pe__ordering_t *order = iter->data;
347  uint32_t order_flags = pcmk__ar_ordered;
348  GList *probes = NULL;
349  GList *then_actions = NULL;
350  pcmk_action_t *first = NULL;
351  pcmk_action_t *then = NULL;
352 
353  // Skip disabled orderings
354  if (order->flags == pcmk__ar_none) {
355  continue;
356  }
357 
358  // Skip non-resource orderings, and orderings for the same resource
359  if ((order->lh_rsc == NULL) || (order->lh_rsc == order->rh_rsc)) {
360  continue;
361  }
362 
363  // Skip invalid orderings (shouldn't be possible)
364  first = order->lh_action;
365  then = order->rh_action;
366  if (((first == NULL) && (order->lh_action_task == NULL))
367  || ((then == NULL) && (order->rh_action_task == NULL))) {
368  continue;
369  }
370 
371  // Skip orderings for first actions other than stop
372  if ((first != NULL) && !pcmk__str_eq(first->task, PCMK_ACTION_STOP,
373  pcmk__str_none)) {
374  continue;
375  } else if ((first == NULL)
376  && !pcmk__ends_with(order->lh_action_task,
377  "_" PCMK_ACTION_STOP "_0")) {
378  continue;
379  }
380 
381  /* Do not imply a probe ordering for a resource inside of a stopping
382  * container. Otherwise, it might introduce a transition loop, since a
383  * probe could be scheduled after the container starts again.
384  */
385  if ((order->rh_rsc != NULL)
386  && (order->lh_rsc->container == order->rh_rsc)) {
387 
388  if ((then != NULL) && pcmk__str_eq(then->task, PCMK_ACTION_STOP,
389  pcmk__str_none)) {
390  continue;
391  } else if ((then == NULL)
393  "_" PCMK_ACTION_STOP "_0")) {
394  continue;
395  }
396  }
397 
398  // Preserve certain order options for future filtering
401  }
404  }
405 
406  // Preserve certain order types for future filtering
408  || (order->flags == pcmk__ar_if_on_same_node_or_target)) {
409  order_flags = order->flags;
410  }
411 
412  // List all scheduled probes for the first resource
413  probes = pe__resource_actions(order->lh_rsc, NULL, PCMK_ACTION_MONITOR,
414  FALSE);
415  if (probes == NULL) { // There aren't any
416  continue;
417  }
418 
419  // List all relevant "then" actions
420  if (then != NULL) {
421  then_actions = g_list_prepend(NULL, then);
422 
423  } else if (order->rh_rsc != NULL) {
424  then_actions = find_actions(order->rh_rsc->actions,
425  order->rh_action_task, NULL);
426  if (then_actions == NULL) { // There aren't any
427  g_list_free(probes);
428  continue;
429  }
430  }
431 
432  crm_trace("Implying 'probe then' orderings for '%s then %s' "
433  "(id=%d, type=%.6x)",
434  ((first == NULL)? order->lh_action_task : first->uuid),
435  ((then == NULL)? order->rh_action_task : then->uuid),
436  order->id, order->flags);
437 
438  for (GList *probe_iter = probes; probe_iter != NULL;
439  probe_iter = probe_iter->next) {
440 
441  pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
442 
443  for (GList *then_iter = then_actions; then_iter != NULL;
444  then_iter = then_iter->next) {
445 
446  pcmk_action_t *then = (pcmk_action_t *) then_iter->data;
447 
448  if (probe_needed_before_action(probe, then)) {
449  order_actions(probe, then, order_flags);
450  }
451  }
452  }
453 
454  g_list_free(then_actions);
455  g_list_free(probes);
456  }
457 }
458 
469 static void
470 add_start_orderings_for_probe(pcmk_action_t *probe,
471  pcmk__related_action_t *after)
472 {
474 
475  /* Although the ordering between the probe of the clone instance and the
476  * start of its parent has been added in pcmk__probe_rsc_on_node(), we
477  * avoided enforcing `pcmk__ar_unrunnable_first_blocks` order type for that
478  * as long as any of the clone instances are running to prevent them from
479  * being unexpectedly stopped.
480  *
481  * On the other hand, we still need to prevent any inactive instances from
482  * starting unless the probe is runnable so that we don't risk starting too
483  * many instances before we know the state on all nodes.
484  */
485  if ((after->action->rsc->variant <= pcmk_rsc_variant_group)
487  // The order type is already enforced for its parent.
489  || (pe__const_top_resource(probe->rsc, false) != after->action->rsc)
490  || !pcmk__str_eq(after->action->task, PCMK_ACTION_START,
491  pcmk__str_none)) {
492  return;
493  }
494 
495  crm_trace("Adding probe start orderings for 'unrunnable %s@%s "
496  "then instances of %s@%s'",
497  probe->uuid, pe__node_name(probe->node),
498  after->action->uuid, pe__node_name(after->action->node));
499 
500  for (GList *then_iter = after->action->actions_after; then_iter != NULL;
501  then_iter = then_iter->next) {
502 
503  pcmk__related_action_t *then = then_iter->data;
504 
505  if (then->action->rsc->running_on
506  || (pe__const_top_resource(then->action->rsc, false)
507  != after->action->rsc)
508  || !pcmk__str_eq(then->action->task, PCMK_ACTION_START,
509  pcmk__str_none)) {
510  continue;
511  }
512 
513  crm_trace("Adding probe start ordering for 'unrunnable %s@%s "
514  "then %s@%s' (type=%#.6x)",
515  probe->uuid, pe__node_name(probe->node),
516  then->action->uuid, pe__node_name(then->action->node), flags);
517 
518  /* Prevent the instance from starting if the instance can't, but don't
519  * cause any other intances to stop if already active.
520  */
521  order_actions(probe, then->action, flags);
522  }
523 
524  return;
525 }
526 
539 static void
540 add_restart_orderings_for_probe(pcmk_action_t *probe, pcmk_action_t *after)
541 {
542  GList *iter = NULL;
543  bool interleave = false;
544  pcmk_resource_t *compatible_rsc = NULL;
545 
546  // Validate that this is a resource probe followed by some action
547  if ((after == NULL) || (probe == NULL) || (probe->rsc == NULL)
548  || (probe->rsc->variant != pcmk_rsc_variant_primitive)
549  || !pcmk__str_eq(probe->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
550  return;
551  }
552 
553  // Avoid running into any possible loop
555  return;
556  }
558 
559  crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
560  probe->uuid, pe__node_name(probe->node),
561  after->uuid, pe__node_name(after->node));
562 
563  /* Add restart orderings if "then" is for a different primitive.
564  * Orderings for collective resources will be added later.
565  */
566  if ((after->rsc != NULL)
567  && (after->rsc->variant == pcmk_rsc_variant_primitive)
568  && (probe->rsc != after->rsc)) {
569 
570  GList *then_actions = NULL;
571 
572  if (pcmk__str_eq(after->task, PCMK_ACTION_START, pcmk__str_none)) {
573  then_actions = pe__resource_actions(after->rsc, NULL,
574  PCMK_ACTION_STOP, FALSE);
575 
576  } else if (pcmk__str_eq(after->task, PCMK_ACTION_PROMOTE,
577  pcmk__str_none)) {
578  then_actions = pe__resource_actions(after->rsc, NULL,
579  PCMK_ACTION_DEMOTE, FALSE);
580  }
581 
582  for (iter = then_actions; iter != NULL; iter = iter->next) {
583  pcmk_action_t *then = (pcmk_action_t *) iter->data;
584 
585  // Skip pseudo-actions (for example, those implied by fencing)
586  if (!pcmk_is_set(then->flags, pcmk_action_pseudo)) {
587  order_actions(probe, then, pcmk__ar_ordered);
588  }
589  }
590  g_list_free(then_actions);
591  }
592 
593  /* Detect whether "then" is an interleaved clone action. For these, we want
594  * to add orderings only for the relevant instance.
595  */
596  if ((after->rsc != NULL)
597  && (after->rsc->variant > pcmk_rsc_variant_group)) {
598  const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
600 
601  interleave = crm_is_true(interleave_s);
602  if (interleave) {
603  compatible_rsc = pcmk__find_compatible_instance(probe->rsc,
604  after->rsc,
606  false);
607  }
608  }
609 
610  /* Now recursively do the same for all actions ordered after "then". This
611  * also handles collective resources since the collective action will be
612  * ordered before its individual instances' actions.
613  */
614  for (iter = after->actions_after; iter != NULL; iter = iter->next) {
615  pcmk__related_action_t *after_wrapper = iter->data;
616 
617  /* pcmk__ar_first_implies_then is the reason why a required A.start
618  * implies/enforces B.start to be required too, which is the cause of
619  * B.restart/re-promote.
620  *
621  * Not sure about pcmk__ar_first_implies_same_node_then though. It's now
622  * only used for unfencing case, which tends to introduce transition
623  * loops...
624  */
625  if (!pcmk_is_set(after_wrapper->type, pcmk__ar_first_implies_then)) {
626  /* The order type between a group/clone and its child such as
627  * B.start-> B_child.start is:
628  * pcmk__ar_then_implies_first_graphed
629  * |pcmk__ar_unrunnable_first_blocks
630  *
631  * Proceed through the ordering chain and build dependencies with
632  * its children.
633  */
634  if ((after->rsc == NULL)
635  || (after->rsc->variant < pcmk_rsc_variant_group)
636  || (probe->rsc->parent == after->rsc)
637  || (after_wrapper->action->rsc == NULL)
638  || (after_wrapper->action->rsc->variant > pcmk_rsc_variant_group)
639  || (after->rsc != after_wrapper->action->rsc->parent)) {
640  continue;
641  }
642 
643  /* Proceed to the children of a group or a non-interleaved clone.
644  * For an interleaved clone, proceed only to the relevant child.
645  */
646  if ((after->rsc->variant > pcmk_rsc_variant_group) && interleave
647  && ((compatible_rsc == NULL)
648  || (compatible_rsc != after_wrapper->action->rsc))) {
649  continue;
650  }
651  }
652 
653  crm_trace("Recursively adding probe restart orderings for "
654  "'%s@%s then %s@%s' (type=%#.6x)",
655  after->uuid, pe__node_name(after->node),
656  after_wrapper->action->uuid,
657  pe__node_name(after_wrapper->action->node),
658  after_wrapper->type);
659 
660  add_restart_orderings_for_probe(probe, after_wrapper->action);
661  }
662 }
663 
670 static void
671 clear_actions_tracking_flag(pcmk_scheduler_t *scheduler)
672 {
673  for (GList *iter = scheduler->actions; iter != NULL; iter = iter->next) {
674  pcmk_action_t *action = iter->data;
675 
677  }
678 }
679 
687 static void
688 add_start_restart_orderings_for_rsc(gpointer data, gpointer user_data)
689 {
690  pcmk_resource_t *rsc = data;
691  GList *probes = NULL;
692 
693  // For collective resources, order each instance recursively
694  if (rsc->variant != pcmk_rsc_variant_primitive) {
695  g_list_foreach(rsc->children, add_start_restart_orderings_for_rsc,
696  NULL);
697  return;
698  }
699 
700  // Find all probes for given resource
701  probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
702 
703  // Add probe restart orderings for each probe found
704  for (GList *iter = probes; iter != NULL; iter = iter->next) {
705  pcmk_action_t *probe = (pcmk_action_t *) iter->data;
706 
707  for (GList *then_iter = probe->actions_after; then_iter != NULL;
708  then_iter = then_iter->next) {
709 
710  pcmk__related_action_t *then = then_iter->data;
711 
712  add_start_orderings_for_probe(probe, then);
713  add_restart_orderings_for_probe(probe, then->action);
714  clear_actions_tracking_flag(rsc->cluster);
715  }
716  }
717 
718  g_list_free(probes);
719 }
720 
729 static void
730 order_then_probes(pcmk_scheduler_t *scheduler)
731 {
732 #if 0
733  /* Given an ordering "A then B", we would prefer to wait for A to be started
734  * before probing B.
735  *
736  * For example, if A is a filesystem which B can't even run without, it
737  * would be helpful if the author of B's agent could assume that A is
738  * running before B.monitor will be called.
739  *
740  * However, we can't _only_ probe after A is running, otherwise we wouldn't
741  * detect the state of B if A could not be started. We can't even do an
742  * opportunistic version of this, because B may be moving:
743  *
744  * A.stop -> A.start -> B.probe -> B.stop -> B.start
745  *
746  * and if we add B.stop -> A.stop here, we get a loop:
747  *
748  * A.stop -> A.start -> B.probe -> B.stop -> A.stop
749  *
750  * We could kill the "B.probe -> B.stop" dependency, but that could mean
751  * stopping B "too" soon, because B.start must wait for the probe, and
752  * we don't want to stop B if we can't start it.
753  *
754  * We could add the ordering only if A is an anonymous clone with
755  * clone-max == node-max (since we'll never be moving it). However, we could
756  * still be stopping one instance at the same time as starting another.
757  *
758  * The complexity of checking for allowed conditions combined with the ever
759  * narrowing use case suggests that this code should remain disabled until
760  * someone gets smarter.
761  */
762  for (GList *iter = scheduler->resources; iter != NULL; iter = iter->next) {
763  pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
764 
765  pcmk_action_t *start = NULL;
766  GList *actions = NULL;
767  GList *probes = NULL;
768 
769  actions = pe__resource_actions(rsc, NULL, PCMK_ACTION_START, FALSE);
770 
771  if (actions) {
772  start = actions->data;
773  g_list_free(actions);
774  }
775 
776  if (start == NULL) {
777  crm_err("No start action for %s", rsc->id);
778  continue;
779  }
780 
781  probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
782 
783  for (actions = start->actions_before; actions != NULL;
784  actions = actions->next) {
785 
786  pcmk__related_action_t *before = actions->data;
787 
788  pcmk_action_t *first = before->action;
789  pcmk_resource_t *first_rsc = first->rsc;
790 
791  if (first->required_runnable_before) {
792  for (GList *clone_actions = first->actions_before;
793  clone_actions != NULL;
794  clone_actions = clone_actions->next) {
795 
796  before = clone_actions->data;
797 
798  crm_trace("Testing '%s then %s' for %s",
799  first->uuid, before->action->uuid, start->uuid);
800 
801  CRM_ASSERT(before->action->rsc != NULL);
802  first_rsc = before->action->rsc;
803  break;
804  }
805 
806  } else if (!pcmk__str_eq(first->task, PCMK_ACTION_START,
807  pcmk__str_none)) {
808  crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
809  }
810 
811  if (first_rsc == NULL) {
812  continue;
813 
814  } else if (pe__const_top_resource(first_rsc, false)
815  == pe__const_top_resource(start->rsc, false)) {
816  crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
817  continue;
818 
819  } else if (!pe_rsc_is_clone(pe__const_top_resource(first_rsc,
820  false))) {
821  crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
822  continue;
823  }
824 
825  crm_err("Applying %s before %s %d", first->uuid, start->uuid,
826  pe__const_top_resource(first_rsc, false)->variant);
827 
828  for (GList *probe_iter = probes; probe_iter != NULL;
829  probe_iter = probe_iter->next) {
830 
831  pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
832 
833  crm_err("Ordering %s before %s", first->uuid, probe->uuid);
834  order_actions(first, probe, pcmk__ar_ordered);
835  }
836  }
837  }
838 #endif
839 }
840 
841 void
843 {
844  // Add orderings for "probe then X"
845  g_list_foreach(scheduler->resources, add_start_restart_orderings_for_rsc,
846  NULL);
847  add_probe_orderings_for_stops(scheduler);
848 
849  order_then_probes(scheduler);
850 }
851 
860 void
862 {
863  // Schedule probes on each node in the cluster as needed
864  for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
865  pcmk_node_t *node = (pcmk_node_t *) iter->data;
866  const char *probed = NULL;
867 
868  if (!node->details->online) { // Don't probe offline nodes
869  if (pcmk__is_failed_remote_node(node)) {
870  pe_fence_node(scheduler, node,
871  "the connection is unrecoverable", FALSE);
872  }
873  continue;
874 
875  } else if (node->details->unclean) { // ... or nodes that need fencing
876  continue;
877 
878  } else if (!node->details->rsc_discovery_enabled) {
879  // The user requested that probes not be done on this node
880  continue;
881  }
882 
883  /* This is no longer needed for live clusters, since the probe_complete
884  * node attribute will never be in the CIB. However this is still useful
885  * for processing old saved CIBs (< 1.1.14), including the
886  * reprobe-target_rc regression test.
887  */
888  probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
889  if (probed != NULL && crm_is_true(probed) == FALSE) {
890  pcmk_action_t *probe_op = NULL;
891 
892  probe_op = custom_action(NULL,
894  node->details->uname),
895  CRM_OP_REPROBE, node, FALSE, scheduler);
898  continue;
899  }
900 
901  // Probe each resource in the cluster on this node, as needed
903  }
904 }
pcmk_assignment_methods_t * cmds
Resource assignment methods.
Definition: resources.h:417
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition: complex.c:962
pcmk_action_t * lh_action
Definition: internal.h:172
pcmk_resource_t * pe__resource_contains_guest_node(const pcmk_scheduler_t *scheduler, const pcmk_resource_t *rsc)
Definition: remote.c:66
Relation applies only if actions are on same node.
A dumping ground.
GHashTable * known_on
Nodes where resource has been probed (key is node ID, not name)
Definition: resources.h:463
&#39;then&#39; is runnable (and migratable) only if &#39;first&#39; is runnable
bool pe__is_guest_or_remote_node(const pcmk_node_t *node)
Definition: remote.c:41
pcmk_scheduler_t * cluster
Cluster that resource is part of.
Definition: resources.h:412
Actions are ordered if on same node (or migration target for migrate_to)
char data[0]
Definition: cpg.c:55
Whether action should not be executed.
Definition: actions.h:244
#define pe__set_action_flags(action, flags_to_set)
Definition: internal.h:76
const char * pe_node_attribute_raw(const pcmk_node_t *node, const char *name)
Definition: common.c:621
G_GNUC_INTERNAL void pcmk__new_ordering(pcmk_resource_t *first_rsc, char *first_task, pcmk_action_t *first_action, pcmk_resource_t *then_rsc, char *then_task, pcmk_action_t *then_action, uint32_t flags, pcmk_scheduler_t *sched)
G_GNUC_INTERNAL void pcmk__order_vs_unfence(const pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_action_t *action, enum pcmk__action_relation_flags order)
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
Definition: complex.c:936
pcmk_action_t * rh_action
Definition: internal.h:177
Stopped.
Definition: roles.h:29
pcmk_resource_t rsc2
pcmk_action_t * action
Definition: actions.h:385
Whether action has already been processed by a recursive procedure.
Definition: actions.h:272
enum rsc_role_e role
Resource&#39;s current role.
Definition: resources.h:468
#define CRM_OP_REPROBE
Definition: crm.h:151
GList * children
Resource&#39;s child resources, if any.
Definition: resources.h:475
gboolean order_actions(pcmk_action_t *lh_action, pcmk_action_t *rh_action, uint32_t flags)
Definition: utils.c:450
#define PCMK_ACTION_ON
Definition: actions.h:63
Service active and promoted.
Definition: results.h:250
enum rsc_role_e next_role
Resource&#39;s scheduled next role.
Definition: resources.h:469
gboolean exclusive_discover
Whether exclusive probing is enabled.
Definition: resources.h:433
#define reload_key(rsc)
Definition: internal.h:383
Implementation of pcmk_action_t.
Definition: actions.h:390
#define PCMK_ACTION_MONITOR
Definition: actions.h:59
GHashTable * meta
Resource&#39;s meta-attributes.
Definition: resources.h:471
bool pcmk__probe_rsc_on_node(pcmk_resource_t *rsc, pcmk_node_t *node)
G_GNUC_INTERNAL pcmk_resource_t * pcmk__find_compatible_instance(const pcmk_resource_t *match_rsc, const pcmk_resource_t *rsc, enum rsc_role_e role, bool current)
#define PCMK_ACTION_DO_SHUTDOWN
Definition: actions.h:51
Promoted.
Definition: roles.h:32
void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:110
GList * actions
Scheduled actions.
Definition: scheduler.h:204
Group resource.
Definition: resources.h:35
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1588
pcmk_resource_t * container
Resource containing this one, if any.
Definition: resources.h:480
Always probe resource on node.
Definition: nodes.h:50
bool pcmk__ends_with(const char *s, const char *match)
Definition: strings.c:533
Implementation of pcmk_scheduler_t.
Definition: scheduler.h:172
gboolean remote_requires_reset
Definition: nodes.h:88
const char * action
Definition: pcmk_fence.c:30
GList * resources
Resources in cluster.
Definition: scheduler.h:196
GList * nodes
Nodes in cluster.
Definition: scheduler.h:195
const char * role2text(enum rsc_role_e role)
Definition: common.c:458
#define PCMK_ACTION_DEMOTE
Definition: actions.h:49
pcmk_resource_t * parent
Resource&#39;s parent resource, if any.
Definition: resources.h:413
pcmk_node_t * node
Node to execute action on, if any.
Definition: actions.h:401
Implementation of pcmk_resource_t.
Definition: resources.h:399
#define crm_debug(fmt, args...)
Definition: logging.h:386
Actions are ordered (optionally, if no other flags are set)
Primitive resource.
Definition: resources.h:34
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:447
pcmk_node_t * pe_find_node_id(const GList *node_list, const char *id)
Find a node by ID in a list of nodes.
Definition: status.c:448
#define XML_BOOLEAN_TRUE
Definition: msg_xml.h:167
char * task
Action name.
Definition: actions.h:403
Whether resource is considered failed.
Definition: resources.h:151
GList * actions_after
For Pacemaker use only.
Definition: actions.h:431
#define pe__clear_action_flags(action, flags_to_clear)
Definition: internal.h:85
#define crm_trace(fmt, args...)
Definition: logging.h:387
#define PCMK_RESOURCE_CLASS_STONITH
Definition: agents.h:31
pcmk_node_t * pe_find_node(const GList *node_list, const char *node_name)
Find a node by name in a list of nodes.
Definition: status.c:473
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
GHashTable * meta
Meta-attributes relevant to action.
Definition: actions.h:414
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:99
struct pe_node_shared_s * details
Basic node information.
Definition: nodes.h:134
#define PCMK_ACTION_START
Definition: actions.h:71
unsigned long long flags
Group of enum pcmk_rsc_flags.
Definition: resources.h:429
const char * uname
Node name in cluster.
Definition: nodes.h:68
#define XML_ATTR_TE_NOWAIT
Definition: msg_xml.h:418
Ordering applies only if &#39;first&#39; is required and on same node as &#39;then&#39;.
#define PCMK_ACTION_STOP
Definition: actions.h:74
GList * actions
Definition: resources.h:447
#define PCMK_ACTION_STONITH
Definition: actions.h:73
Never probe resource on node.
Definition: nodes.h:51
char * uuid
Action key.
Definition: actions.h:404
void pcmk__schedule_probes(pcmk_scheduler_t *scheduler)
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition: actions.c:42
void pcmk__order_probes(pcmk_scheduler_t *scheduler)
Implementation of pcmk_node_t.
Definition: nodes.h:130
enum pe_obj_types variant
Resource variant.
Definition: resources.h:414
bool(* create_probe)(pcmk_resource_t *rsc, pcmk_node_t *node)
int rsc_discover_mode
Probe mode (enum pe_discover_e)
Definition: nodes.h:137
pcmk_resource_t rsc1
pcmk_resource_t * lh_rsc
Definition: internal.h:171
Service safely stopped.
Definition: results.h:249
const char * id
Node ID at the cluster layer.
Definition: nodes.h:67
pcmk_action_t * custom_action(pcmk_resource_t *rsc, char *key, const char *task, const pcmk_node_t *on_node, gboolean optional, pcmk_scheduler_t *scheduler)
Create or update an action object.
Definition: pe_actions.c:1117
void pe__add_action_expected_result(pcmk_action_t *action, int expected_result)
Definition: pe_actions.c:1861
gboolean rsc_discovery_enabled
Whether probes are allowed on node.
Definition: nodes.h:82
bool pe__is_guest_node(const pcmk_node_t *node)
Definition: remote.c:33
Cluster status and scheduling.
gboolean is_remote_node
Whether this is a remote connection.
Definition: resources.h:432
GList * ordering_constraints
Ordering constraints.
Definition: scheduler.h:198
Whether action is runnable.
Definition: actions.h:241
void add_hash_param(GHashTable *hash, const char *name, const char *value)
Definition: common.c:508
#define crm_err(fmt, args...)
Definition: logging.h:381
pcmk_scheduler_t * scheduler
Whether action does not require invoking an agent.
Definition: actions.h:238
GList * find_actions(GList *input, const char *key, const pcmk_node_t *on_node)
Definition: pe_actions.c:1515
#define CRM_ASSERT(expr)
Definition: results.h:42
Relation applies only if &#39;first&#39; cannot be part of a live migration.
bool pcmk__probe_resource_list(GList *rscs, pcmk_node_t *node)
pcmk_node_t * allocated_to
Node resource is assigned to.
Definition: resources.h:451
#define PCMK_ACTION_PROMOTE
Definition: actions.h:65
GList * running_on
Nodes where resource may be active.
Definition: resources.h:460
enum pe_action_flags flags
Group of enum pe_action_flags.
Definition: actions.h:409
#define CRM_OP_PROBED
Definition: crm.h:150
gboolean crm_is_true(const char *s)
Definition: strings.c:416
pcmk_resource_t * rsc
Resource to apply action to, if any.
Definition: actions.h:400
Resource role is unknown.
Definition: roles.h:28
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:37
#define pe__set_order_flags(order_flags, flags_to_set)
Definition: internal.h:128
unsigned long long flags
Group of enum pcmk_scheduler_flags.
Definition: scheduler.h:183
#define XML_RSC_ATTR_INTERLEAVE
Definition: msg_xml.h:245
pcmk_resource_t * rh_rsc
Definition: internal.h:176
enum pe_ordering type
Definition: actions.h:380
gboolean unclean
Whether node requires fencing.
Definition: nodes.h:76
Whether resource has been removed from the configuration.
Definition: resources.h:103
gboolean online
Whether online.
Definition: nodes.h:72
uint64_t flags
Definition: remote.c:215
GList * actions_before
For Pacemaker use only.
Definition: actions.h:430
int required_runnable_before
Definition: actions.h:427
G_GNUC_INTERNAL bool pcmk__is_failed_remote_node(const pcmk_node_t *node)
pcmk_resource_t * remote_rsc
Remote connection resource for node, if it is a Pacemaker Remote node.
Definition: nodes.h:111
No relation (compare with equality rather than bit set)
#define XML_AGENT_ATTR_CLASS
Definition: msg_xml.h:280
char * id
Resource ID in configuration.
Definition: resources.h:400
GHashTable * allowed_nodes
Nodes where resource may run (key is node ID, not name)
Definition: resources.h:466