pacemaker  2.1.8-3980678f03
Scalable High-Availability cluster resource manager
pcmk_sched_probes.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2024 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <glib.h>
13 
14 #include <crm/crm.h>
15 #include <crm/pengine/status.h>
16 #include <pacemaker-internal.h>
17 #include "libpacemaker_private.h"
18 
27 static void
28 add_expected_result(pcmk_action_t *probe, const pcmk_resource_t *rsc,
29  const pcmk_node_t *node)
30 {
31  // Check whether resource is currently active on node
32  pcmk_node_t *running = pe_find_node_id(rsc->running_on, node->details->id);
33 
34  // The expected result is what we think the resource's current state is
35  if (running == NULL) {
37 
38  } else if (rsc->role == pcmk_role_promoted) {
40  }
41 }
42 
52 bool
54 {
55  bool any_created = false;
56 
57  for (GList *iter = rscs; iter != NULL; iter = iter->next) {
58  pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
59 
60  if (rsc->cmds->create_probe(rsc, node)) {
61  any_created = true;
62  }
63  }
64  return any_created;
65 }
66 
74 static void
75 probe_then_start(pcmk_resource_t *rsc1, pcmk_resource_t *rsc2)
76 {
77  if ((rsc1->allocated_to != NULL)
78  && (g_hash_table_lookup(rsc1->known_on,
79  rsc1->allocated_to->details->id) == NULL)) {
80 
83  NULL,
85  NULL,
87  }
88 }
89 
98 static bool
99 guest_resource_will_stop(const pcmk_node_t *node)
100 {
101  const pcmk_resource_t *guest_rsc = node->details->remote_rsc->container;
102 
103  /* Ideally, we'd check whether the guest has a required stop, but that
104  * information doesn't exist yet, so approximate it ...
105  */
106  return node->details->remote_requires_reset
107  || node->details->unclean
108  || pcmk_is_set(guest_rsc->flags, pcmk_rsc_failed)
109  || (guest_rsc->next_role == pcmk_role_stopped)
110 
111  // Guest is moving
112  || ((guest_rsc->role > pcmk_role_stopped)
113  && (guest_rsc->allocated_to != NULL)
114  && (pcmk__find_node_in_list(guest_rsc->running_on,
115  guest_rsc->allocated_to->details->uname) == NULL));
116 }
117 
127 static pcmk_action_t *
128 probe_action(pcmk_resource_t *rsc, pcmk_node_t *node)
129 {
130  pcmk_action_t *probe = NULL;
131  char *key = pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0);
132 
133  crm_debug("Scheduling probe of %s %s on %s",
134  pcmk_role_text(rsc->role), rsc->id, pcmk__node_name(node));
135 
136  probe = custom_action(rsc, key, PCMK_ACTION_MONITOR, node, FALSE,
137  rsc->cluster);
139 
140  pcmk__order_vs_unfence(rsc, node, probe, pcmk__ar_ordered);
141  add_expected_result(probe, rsc, node);
142  return probe;
143 }
144 
156 bool
158 {
159  uint32_t flags = pcmk__ar_ordered;
160  pcmk_action_t *probe = NULL;
161  pcmk_node_t *allowed = NULL;
162  pcmk_resource_t *top = uber_parent(rsc);
163  const char *reason = NULL;
164 
165  CRM_ASSERT((rsc != NULL) && (node != NULL));
166 
168  reason = "start-up probes are disabled";
169  goto no_probe;
170  }
171 
172  if (pcmk__is_pacemaker_remote_node(node)) {
173  const char *class = crm_element_value(rsc->xml, PCMK_XA_CLASS);
174 
175  if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) {
176  reason = "Pacemaker Remote nodes cannot run stonith agents";
177  goto no_probe;
178 
179  } else if (pcmk__is_guest_or_bundle_node(node)
180  && pe__resource_contains_guest_node(rsc->cluster, rsc)) {
181  reason = "guest nodes cannot run resources containing guest nodes";
182  goto no_probe;
183 
184  } else if (rsc->is_remote_node) {
185  reason = "Pacemaker Remote nodes cannot host remote connections";
186  goto no_probe;
187  }
188  }
189 
190  // If this is a collective resource, probes are created for its children
191  if (rsc->children != NULL) {
192  return pcmk__probe_resource_list(rsc->children, node);
193  }
194 
195  if ((rsc->container != NULL) && !rsc->is_remote_node) {
196  reason = "resource is inside a container";
197  goto no_probe;
198 
199  } else if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
200  reason = "resource is orphaned";
201  goto no_probe;
202 
203  } else if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
204  reason = "resource state is already known";
205  goto no_probe;
206  }
207 
208  allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
209 
210  if (rsc->exclusive_discover || top->exclusive_discover) {
211  // Exclusive discovery is enabled ...
212 
213  if (allowed == NULL) {
214  // ... but this node is not allowed to run the resource
215  reason = "resource has exclusive discovery but is not allowed "
216  "on node";
217  goto no_probe;
218 
219  } else if (allowed->rsc_discover_mode != pcmk_probe_exclusive) {
220  // ... but no constraint marks this node for discovery of resource
221  reason = "resource has exclusive discovery but is not enabled "
222  "on node";
223  goto no_probe;
224  }
225  }
226 
227  if (allowed == NULL) {
228  allowed = node;
229  }
230  if (allowed->rsc_discover_mode == pcmk_probe_never) {
231  reason = "node has discovery disabled";
232  goto no_probe;
233  }
234 
235  if (pcmk__is_guest_or_bundle_node(node)) {
236  pcmk_resource_t *guest = node->details->remote_rsc->container;
237 
238  if (guest->role == pcmk_role_stopped) {
239  // The guest is stopped, so we know no resource is active there
240  reason = "node's guest is stopped";
241  probe_then_start(guest, top);
242  goto no_probe;
243 
244  } else if (guest_resource_will_stop(node)) {
245  reason = "node's guest will stop";
246 
247  // Order resource start after guest stop (in case it's restarting)
248  pcmk__new_ordering(guest,
249  pcmk__op_key(guest->id, PCMK_ACTION_STOP, 0),
250  NULL, top,
252  NULL, pcmk__ar_ordered, rsc->cluster);
253  goto no_probe;
254  }
255  }
256 
257  // We've eliminated all cases where a probe is not needed, so now it is
258  probe = probe_action(rsc, node);
259 
260  /* Below, we will order the probe relative to start or reload. If this is a
261  * clone instance, the start or reload is for the entire clone rather than
262  * just the instance. Otherwise, the start or reload is for the resource
263  * itself.
264  */
265  if (!pcmk__is_clone(top)) {
266  top = rsc;
267  }
268 
269  /* Prevent a start if the resource can't be probed, but don't cause the
270  * resource or entire clone to stop if already active.
271  */
273  && (top->running_on == NULL)) {
275  }
276 
277  // Start or reload after probing the resource
278  pcmk__new_ordering(rsc, NULL, probe,
279  top, pcmk__op_key(top->id, PCMK_ACTION_START, 0), NULL,
280  flags, rsc->cluster);
281  pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
282  pcmk__ar_ordered, rsc->cluster);
283 
284  return true;
285 
286 no_probe:
287  pcmk__rsc_trace(rsc,
288  "Skipping probe for %s on %s because %s",
289  rsc->id, node->details->id, reason);
290  return false;
291 }
292 
302 static bool
303 probe_needed_before_action(const pcmk_action_t *probe,
304  const pcmk_action_t *then)
305 {
306  // Probes on a node are performed after unfencing it, not before
307  if (pcmk__str_eq(then->task, PCMK_ACTION_STONITH, pcmk__str_none)
308  && pcmk__same_node(probe->node, then->node)) {
309  const char *op = g_hash_table_lookup(then->meta,
311 
312  if (pcmk__str_eq(op, PCMK_ACTION_ON, pcmk__str_casei)) {
313  return false;
314  }
315  }
316 
317  // Probes should be done on a node before shutting it down
318  if (pcmk__str_eq(then->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)
319  && (probe->node != NULL) && (then->node != NULL)
320  && !pcmk__same_node(probe->node, then->node)) {
321  return false;
322  }
323 
324  // Otherwise probes should always be done before any other action
325  return true;
326 }
327 
341 static void
342 add_probe_orderings_for_stops(pcmk_scheduler_t *scheduler)
343 {
344  for (GList *iter = scheduler->ordering_constraints; iter != NULL;
345  iter = iter->next) {
346 
347  pcmk__action_relation_t *order = iter->data;
348  uint32_t order_flags = pcmk__ar_ordered;
349  GList *probes = NULL;
350  GList *then_actions = NULL;
351  pcmk_action_t *first = NULL;
352  pcmk_action_t *then = NULL;
353 
354  // Skip disabled orderings
355  if (order->flags == pcmk__ar_none) {
356  continue;
357  }
358 
359  // Skip non-resource orderings, and orderings for the same resource
360  if ((order->rsc1 == NULL) || (order->rsc1 == order->rsc2)) {
361  continue;
362  }
363 
364  // Skip invalid orderings (shouldn't be possible)
365  first = order->action1;
366  then = order->action2;
367  if (((first == NULL) && (order->task1 == NULL))
368  || ((then == NULL) && (order->task2 == NULL))) {
369  continue;
370  }
371 
372  // Skip orderings for first actions other than stop
373  if ((first != NULL) && !pcmk__str_eq(first->task, PCMK_ACTION_STOP,
374  pcmk__str_none)) {
375  continue;
376  } else if ((first == NULL)
377  && !pcmk__ends_with(order->task1,
378  "_" PCMK_ACTION_STOP "_0")) {
379  continue;
380  }
381 
382  /* Do not imply a probe ordering for a resource inside of a stopping
383  * container. Otherwise, it might introduce a transition loop, since a
384  * probe could be scheduled after the container starts again.
385  */
386  if ((order->rsc2 != NULL) && (order->rsc1->container == order->rsc2)) {
387 
388  if ((then != NULL) && pcmk__str_eq(then->task, PCMK_ACTION_STOP,
389  pcmk__str_none)) {
390  continue;
391  } else if ((then == NULL)
392  && pcmk__ends_with(order->task2,
393  "_" PCMK_ACTION_STOP "_0")) {
394  continue;
395  }
396  }
397 
398  // Preserve certain order options for future filtering
400  pcmk__set_relation_flags(order_flags,
402  }
405  }
406 
407  // Preserve certain order types for future filtering
409  || (order->flags == pcmk__ar_if_on_same_node_or_target)) {
410  order_flags = order->flags;
411  }
412 
413  // List all scheduled probes for the first resource
414  probes = pe__resource_actions(order->rsc1, NULL, PCMK_ACTION_MONITOR,
415  FALSE);
416  if (probes == NULL) { // There aren't any
417  continue;
418  }
419 
420  // List all relevant "then" actions
421  if (then != NULL) {
422  then_actions = g_list_prepend(NULL, then);
423 
424  } else if (order->rsc2 != NULL) {
425  then_actions = find_actions(order->rsc2->actions, order->task2,
426  NULL);
427  if (then_actions == NULL) { // There aren't any
428  g_list_free(probes);
429  continue;
430  }
431  }
432 
433  crm_trace("Implying 'probe then' orderings for '%s then %s' "
434  "(id=%d, type=%.6x)",
435  ((first == NULL)? order->task1 : first->uuid),
436  ((then == NULL)? order->task2 : then->uuid),
437  order->id, order->flags);
438 
439  for (GList *probe_iter = probes; probe_iter != NULL;
440  probe_iter = probe_iter->next) {
441 
442  pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
443 
444  for (GList *then_iter = then_actions; then_iter != NULL;
445  then_iter = then_iter->next) {
446 
447  pcmk_action_t *then = (pcmk_action_t *) then_iter->data;
448 
449  if (probe_needed_before_action(probe, then)) {
450  order_actions(probe, then, order_flags);
451  }
452  }
453  }
454 
455  g_list_free(then_actions);
456  g_list_free(probes);
457  }
458 }
459 
470 static void
471 add_start_orderings_for_probe(pcmk_action_t *probe,
472  pcmk__related_action_t *after)
473 {
475 
476  /* Although the ordering between the probe of the clone instance and the
477  * start of its parent has been added in pcmk__probe_rsc_on_node(), we
478  * avoided enforcing `pcmk__ar_unrunnable_first_blocks` order type for that
479  * as long as any of the clone instances are running to prevent them from
480  * being unexpectedly stopped.
481  *
482  * On the other hand, we still need to prevent any inactive instances from
483  * starting unless the probe is runnable so that we don't risk starting too
484  * many instances before we know the state on all nodes.
485  */
486  if ((after->action->rsc->variant <= pcmk_rsc_variant_group)
488  // The order type is already enforced for its parent.
490  || (pe__const_top_resource(probe->rsc, false) != after->action->rsc)
491  || !pcmk__str_eq(after->action->task, PCMK_ACTION_START,
492  pcmk__str_none)) {
493  return;
494  }
495 
496  crm_trace("Adding probe start orderings for 'unrunnable %s@%s "
497  "then instances of %s@%s'",
498  probe->uuid, pcmk__node_name(probe->node),
499  after->action->uuid, pcmk__node_name(after->action->node));
500 
501  for (GList *then_iter = after->action->actions_after; then_iter != NULL;
502  then_iter = then_iter->next) {
503 
504  pcmk__related_action_t *then = then_iter->data;
505 
506  if (then->action->rsc->running_on
507  || (pe__const_top_resource(then->action->rsc, false)
508  != after->action->rsc)
509  || !pcmk__str_eq(then->action->task, PCMK_ACTION_START,
510  pcmk__str_none)) {
511  continue;
512  }
513 
514  crm_trace("Adding probe start ordering for 'unrunnable %s@%s "
515  "then %s@%s' (type=%#.6x)",
516  probe->uuid, pcmk__node_name(probe->node),
517  then->action->uuid, pcmk__node_name(then->action->node),
518  flags);
519 
520  /* Prevent the instance from starting if the instance can't, but don't
521  * cause any other intances to stop if already active.
522  */
523  order_actions(probe, then->action, flags);
524  }
525 
526  return;
527 }
528 
541 static void
542 add_restart_orderings_for_probe(pcmk_action_t *probe, pcmk_action_t *after)
543 {
544  GList *iter = NULL;
545  bool interleave = false;
546  pcmk_resource_t *compatible_rsc = NULL;
547 
548  // Validate that this is a resource probe followed by some action
549  if ((after == NULL) || (probe == NULL) || !pcmk__is_primitive(probe->rsc)
550  || !pcmk__str_eq(probe->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
551  return;
552  }
553 
554  // Avoid running into any possible loop
556  return;
557  }
559 
560  crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
561  probe->uuid, pcmk__node_name(probe->node),
562  after->uuid, pcmk__node_name(after->node));
563 
564  /* Add restart orderings if "then" is for a different primitive.
565  * Orderings for collective resources will be added later.
566  */
567  if (pcmk__is_primitive(after->rsc) && (probe->rsc != after->rsc)) {
568 
569  GList *then_actions = NULL;
570 
571  if (pcmk__str_eq(after->task, PCMK_ACTION_START, pcmk__str_none)) {
572  then_actions = pe__resource_actions(after->rsc, NULL,
573  PCMK_ACTION_STOP, FALSE);
574 
575  } else if (pcmk__str_eq(after->task, PCMK_ACTION_PROMOTE,
576  pcmk__str_none)) {
577  then_actions = pe__resource_actions(after->rsc, NULL,
578  PCMK_ACTION_DEMOTE, FALSE);
579  }
580 
581  for (iter = then_actions; iter != NULL; iter = iter->next) {
582  pcmk_action_t *then = (pcmk_action_t *) iter->data;
583 
584  // Skip pseudo-actions (for example, those implied by fencing)
585  if (!pcmk_is_set(then->flags, pcmk_action_pseudo)) {
586  order_actions(probe, then, pcmk__ar_ordered);
587  }
588  }
589  g_list_free(then_actions);
590  }
591 
592  /* Detect whether "then" is an interleaved clone action. For these, we want
593  * to add orderings only for the relevant instance.
594  */
595  if ((after->rsc != NULL)
596  && (after->rsc->variant > pcmk_rsc_variant_group)) {
597  const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
599 
600  interleave = crm_is_true(interleave_s);
601  if (interleave) {
602  compatible_rsc = pcmk__find_compatible_instance(probe->rsc,
603  after->rsc,
605  false);
606  }
607  }
608 
609  /* Now recursively do the same for all actions ordered after "then". This
610  * also handles collective resources since the collective action will be
611  * ordered before its individual instances' actions.
612  */
613  for (iter = after->actions_after; iter != NULL; iter = iter->next) {
614  pcmk__related_action_t *after_wrapper = iter->data;
615 
616  /* pcmk__ar_first_implies_then is the reason why a required A.start
617  * implies/enforces B.start to be required too, which is the cause of
618  * B.restart/re-promote.
619  *
620  * Not sure about pcmk__ar_first_implies_same_node_then though. It's now
621  * only used for unfencing case, which tends to introduce transition
622  * loops...
623  */
624  if (!pcmk_is_set(after_wrapper->type, pcmk__ar_first_implies_then)) {
625  /* The order type between a group/clone and its child such as
626  * B.start-> B_child.start is:
627  * pcmk__ar_then_implies_first_graphed
628  * |pcmk__ar_unrunnable_first_blocks
629  *
630  * Proceed through the ordering chain and build dependencies with
631  * its children.
632  */
633  if ((after->rsc == NULL)
634  || (after->rsc->variant < pcmk_rsc_variant_group)
635  || (probe->rsc->parent == after->rsc)
636  || (after_wrapper->action->rsc == NULL)
637  || (after_wrapper->action->rsc->variant > pcmk_rsc_variant_group)
638  || (after->rsc != after_wrapper->action->rsc->parent)) {
639  continue;
640  }
641 
642  /* Proceed to the children of a group or a non-interleaved clone.
643  * For an interleaved clone, proceed only to the relevant child.
644  */
645  if ((after->rsc->variant > pcmk_rsc_variant_group) && interleave
646  && ((compatible_rsc == NULL)
647  || (compatible_rsc != after_wrapper->action->rsc))) {
648  continue;
649  }
650  }
651 
652  crm_trace("Recursively adding probe restart orderings for "
653  "'%s@%s then %s@%s' (type=%#.6x)",
654  after->uuid, pcmk__node_name(after->node),
655  after_wrapper->action->uuid,
656  pcmk__node_name(after_wrapper->action->node),
657  after_wrapper->type);
658 
659  add_restart_orderings_for_probe(probe, after_wrapper->action);
660  }
661 }
662 
669 static void
670 clear_actions_tracking_flag(pcmk_scheduler_t *scheduler)
671 {
672  for (GList *iter = scheduler->actions; iter != NULL; iter = iter->next) {
673  pcmk_action_t *action = iter->data;
674 
676  }
677 }
678 
686 static void
687 add_start_restart_orderings_for_rsc(gpointer data, gpointer user_data)
688 {
689  pcmk_resource_t *rsc = data;
690  GList *probes = NULL;
691 
692  // For collective resources, order each instance recursively
693  if (!pcmk__is_primitive(rsc)) {
694  g_list_foreach(rsc->children, add_start_restart_orderings_for_rsc,
695  NULL);
696  return;
697  }
698 
699  // Find all probes for given resource
700  probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
701 
702  // Add probe restart orderings for each probe found
703  for (GList *iter = probes; iter != NULL; iter = iter->next) {
704  pcmk_action_t *probe = (pcmk_action_t *) iter->data;
705 
706  for (GList *then_iter = probe->actions_after; then_iter != NULL;
707  then_iter = then_iter->next) {
708 
709  pcmk__related_action_t *then = then_iter->data;
710 
711  add_start_orderings_for_probe(probe, then);
712  add_restart_orderings_for_probe(probe, then->action);
713  clear_actions_tracking_flag(rsc->cluster);
714  }
715  }
716 
717  g_list_free(probes);
718 }
719 
728 static void
729 order_then_probes(pcmk_scheduler_t *scheduler)
730 {
731 #if 0
732  /* Given an ordering "A then B", we would prefer to wait for A to be started
733  * before probing B.
734  *
735  * For example, if A is a filesystem which B can't even run without, it
736  * would be helpful if the author of B's agent could assume that A is
737  * running before B.monitor will be called.
738  *
739  * However, we can't _only_ probe after A is running, otherwise we wouldn't
740  * detect the state of B if A could not be started. We can't even do an
741  * opportunistic version of this, because B may be moving:
742  *
743  * A.stop -> A.start -> B.probe -> B.stop -> B.start
744  *
745  * and if we add B.stop -> A.stop here, we get a loop:
746  *
747  * A.stop -> A.start -> B.probe -> B.stop -> A.stop
748  *
749  * We could kill the "B.probe -> B.stop" dependency, but that could mean
750  * stopping B "too" soon, because B.start must wait for the probe, and
751  * we don't want to stop B if we can't start it.
752  *
753  * We could add the ordering only if A is an anonymous clone with
754  * clone-max == node-max (since we'll never be moving it). However, we could
755  * still be stopping one instance at the same time as starting another.
756  *
757  * The complexity of checking for allowed conditions combined with the ever
758  * narrowing use case suggests that this code should remain disabled until
759  * someone gets smarter.
760  */
761  for (GList *iter = scheduler->resources; iter != NULL; iter = iter->next) {
762  pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
763 
764  pcmk_action_t *start = NULL;
765  GList *actions = NULL;
766  GList *probes = NULL;
767 
768  actions = pe__resource_actions(rsc, NULL, PCMK_ACTION_START, FALSE);
769 
770  if (actions) {
771  start = actions->data;
772  g_list_free(actions);
773  }
774 
775  if (start == NULL) {
776  crm_debug("No start action for %s", rsc->id);
777  continue;
778  }
779 
780  probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
781 
782  for (actions = start->actions_before; actions != NULL;
783  actions = actions->next) {
784 
785  pcmk__related_action_t *before = actions->data;
786 
787  pcmk_action_t *first = before->action;
788  pcmk_resource_t *first_rsc = first->rsc;
789 
790  if (first->required_runnable_before) {
791  for (GList *clone_actions = first->actions_before;
792  clone_actions != NULL;
793  clone_actions = clone_actions->next) {
794 
795  before = clone_actions->data;
796 
797  crm_trace("Testing '%s then %s' for %s",
798  first->uuid, before->action->uuid, start->uuid);
799 
800  CRM_ASSERT(before->action->rsc != NULL);
801  first_rsc = before->action->rsc;
802  break;
803  }
804 
805  } else if (!pcmk__str_eq(first->task, PCMK_ACTION_START,
806  pcmk__str_none)) {
807  crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
808  }
809 
810  if (first_rsc == NULL) {
811  continue;
812 
813  } else if (pe__const_top_resource(first_rsc, false)
814  == pe__const_top_resource(start->rsc, false)) {
815  crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
816  continue;
817 
818  } else if (!pcmk__is_clone(pe__const_top_resource(first_rsc,
819  false))) {
820  crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
821  continue;
822  }
823 
824  crm_debug("Applying %s before %s %d", first->uuid, start->uuid,
825  pe__const_top_resource(first_rsc, false)->variant);
826 
827  for (GList *probe_iter = probes; probe_iter != NULL;
828  probe_iter = probe_iter->next) {
829 
830  pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
831 
832  crm_debug("Ordering %s before %s", first->uuid, probe->uuid);
833  order_actions(first, probe, pcmk__ar_ordered);
834  }
835  }
836  }
837 #endif
838 }
839 
840 void
842 {
843  // Add orderings for "probe then X"
844  g_list_foreach(scheduler->resources, add_start_restart_orderings_for_rsc,
845  NULL);
846  add_probe_orderings_for_stops(scheduler);
847 
848  order_then_probes(scheduler);
849 }
850 
859 void
861 {
862  // Schedule probes on each node in the cluster as needed
863  for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
864  pcmk_node_t *node = (pcmk_node_t *) iter->data;
865  const char *probed = NULL;
866 
867  if (!node->details->online) { // Don't probe offline nodes
868  if (pcmk__is_failed_remote_node(node)) {
869  pe_fence_node(scheduler, node,
870  "the connection is unrecoverable", FALSE);
871  }
872  continue;
873 
874  } else if (node->details->unclean) { // ... or nodes that need fencing
875  continue;
876 
877  } else if (!node->details->rsc_discovery_enabled) {
878  // The user requested that probes not be done on this node
879  continue;
880  }
881 
882  /* This is no longer needed for live clusters, since the probe_complete
883  * node attribute will never be in the CIB. However this is still useful
884  * for processing old saved CIBs (< 1.1.14), including the
885  * reprobe-target_rc regression test.
886  */
887  probed = pcmk__node_attr(node, CRM_OP_PROBED, NULL,
889  if (probed != NULL && crm_is_true(probed) == FALSE) {
890  pcmk_action_t *probe_op = NULL;
891 
892  probe_op = custom_action(NULL,
894  node->details->uname),
895  CRM_OP_REPROBE, node, FALSE, scheduler);
897  continue;
898  }
899 
900  // Probe each resource in the cluster on this node, as needed
902  }
903 }
pcmk_assignment_methods_t * cmds
Definition: resources.h:413
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition: complex.c:1032
pcmk_resource_t * pe__resource_contains_guest_node(const pcmk_scheduler_t *scheduler, const pcmk_resource_t *rsc)
Definition: remote.c:29
Relation applies only if actions are on same node.
A dumping ground.
GHashTable * known_on
Definition: resources.h:459
&#39;then&#39; is runnable (and migratable) only if &#39;first&#39; is runnable
pcmk_scheduler_t * cluster
Definition: resources.h:408
Actions are ordered if on same node (or migration target for migrate_to)
const char * pcmk_role_text(enum rsc_role_e role)
Get readable description of a resource role.
Definition: roles.c:23
char data[0]
Definition: cpg.c:58
G_GNUC_INTERNAL void pcmk__new_ordering(pcmk_resource_t *first_rsc, char *first_task, pcmk_action_t *first_action, pcmk_resource_t *then_rsc, char *then_task, pcmk_action_t *then_action, uint32_t flags, pcmk_scheduler_t *sched)
G_GNUC_INTERNAL void pcmk__order_vs_unfence(const pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_action_t *action, enum pcmk__action_relation_flags order)
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
Definition: complex.c:1007
Stopped.
Definition: roles.h:36
pcmk_resource_t rsc2
pcmk_action_t * action
Definition: actions.h:322
enum rsc_role_e role
Definition: resources.h:464
#define CRM_OP_REPROBE
Definition: crm.h:136
GList * children
Definition: resources.h:471
#define pcmk__rsc_trace(rsc, fmt, args...)
gboolean order_actions(pcmk_action_t *lh_action, pcmk_action_t *rh_action, uint32_t flags)
Definition: utils.c:457
#define PCMK_ACTION_ON
Definition: actions.h:64
Service active and promoted.
Definition: results.h:265
enum rsc_role_e next_role
Definition: resources.h:465
gboolean exclusive_discover
Definition: resources.h:432
#define reload_key(rsc)
Definition: internal.h:218
#define pcmk__insert_meta(obj, name, value)
#define PCMK_ACTION_MONITOR
Definition: actions.h:60
GHashTable * meta
Definition: resources.h:467
bool pcmk__probe_rsc_on_node(pcmk_resource_t *rsc, pcmk_node_t *node)
G_GNUC_INTERNAL pcmk_resource_t * pcmk__find_compatible_instance(const pcmk_resource_t *match_rsc, const pcmk_resource_t *rsc, enum rsc_role_e role, bool current)
#define PCMK_META_INTERLEAVE
Definition: options.h:90
#define PCMK_ACTION_DO_SHUTDOWN
Definition: actions.h:51
Promoted.
Definition: roles.h:39
void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:112
#define PCMK__META_STONITH_ACTION
GList * actions
Definition: scheduler.h:239
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1551
pcmk_resource_t * container
Definition: resources.h:476
bool pcmk__ends_with(const char *s, const char *match)
Definition: strings.c:608
#define pcmk__set_relation_flags(ar_flags, flags_to_set)
gboolean remote_requires_reset
Definition: nodes.h:112
const char * action
Definition: pcmk_fence.c:30
GList * resources
Definition: scheduler.h:231
#define PCMK_ACTION_DEMOTE
Definition: actions.h:49
pcmk_resource_t * parent
Definition: resources.h:409
pcmk_node_t * node
Definition: actions.h:341
#define crm_debug(fmt, args...)
Definition: logging.h:402
Actions are ordered (optionally, if no other flags are set)
#define pcmk__clear_action_flags(action, flags_to_clear)
pcmk_node_t * pcmk__find_node_in_list(const GList *nodes, const char *node_name)
Definition: nodes.c:150
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:446
pcmk_node_t * pe_find_node_id(const GList *node_list, const char *id)
Find a node by ID in a list of nodes.
Definition: status.c:487
char * task
Definition: actions.h:343
GList * actions_after
Definition: actions.h:371
#define crm_trace(fmt, args...)
Definition: logging.h:404
#define PCMK_RESOURCE_CLASS_STONITH
Definition: agents.h:31
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
GHashTable * meta
Definition: actions.h:354
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:98
struct pe_node_shared_s * details
Definition: nodes.h:167
#define PCMK_ACTION_START
Definition: actions.h:72
unsigned long long flags
Definition: resources.h:428
const char * uname
Definition: nodes.h:73
Ordering applies only if &#39;first&#39; is required and on same node as &#39;then&#39;.
#define PCMK_ACTION_STOP
Definition: actions.h:75
GList * actions
Definition: resources.h:444
#define PCMK_ACTION_STONITH
Definition: actions.h:74
#define PCMK_VALUE_TRUE
Definition: options.h:215
char * uuid
Definition: actions.h:344
void pcmk__schedule_probes(pcmk_scheduler_t *scheduler)
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition: actions.c:196
void pcmk__order_probes(pcmk_scheduler_t *scheduler)
enum pe_obj_types variant
Definition: resources.h:410
bool(* create_probe)(pcmk_resource_t *rsc, pcmk_node_t *node)
int rsc_discover_mode
Definition: nodes.h:170
pcmk_resource_t rsc1
Service safely stopped.
Definition: results.h:264
const char * id
Definition: nodes.h:72
pcmk_action_t * custom_action(pcmk_resource_t *rsc, char *key, const char *task, const pcmk_node_t *on_node, gboolean optional, pcmk_scheduler_t *scheduler)
Create or update an action object.
Definition: pe_actions.c:1129
void pe__add_action_expected_result(pcmk_action_t *action, int expected_result)
Definition: pe_actions.c:1828
gboolean rsc_discovery_enabled
Definition: nodes.h:106
#define PCMK_XA_CLASS
Definition: xml_names.h:241
Cluster status and scheduling.
gboolean is_remote_node
Definition: resources.h:431
GList * ordering_constraints
Definition: scheduler.h:233
pcmk_scheduler_t * scheduler
GList * find_actions(GList *input, const char *key, const pcmk_node_t *on_node)
Definition: pe_actions.c:1478
#define CRM_ASSERT(expr)
Definition: results.h:42
Relation applies only if &#39;first&#39; cannot be part of a live migration.
#define PCMK__META_OP_NO_WAIT
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
Definition: attrs.c:118
bool pcmk__probe_resource_list(GList *rscs, pcmk_node_t *node)
pcmk_node_t * allocated_to
Definition: resources.h:447
#define PCMK_ACTION_PROMOTE
Definition: actions.h:66
GList * running_on
Definition: resources.h:456
enum pe_action_flags flags
Definition: actions.h:349
#define CRM_OP_PROBED
Definition: crm.h:135
#define pcmk__set_action_flags(action, flags_to_set)
gboolean crm_is_true(const char *s)
Definition: strings.c:488
pcmk_resource_t * rsc
Definition: actions.h:340
Resource role is unknown.
Definition: roles.h:35
unsigned long long flags
Definition: scheduler.h:211
enum pe_ordering type
Definition: actions.h:317
gboolean unclean
Definition: nodes.h:91
gboolean online
Definition: nodes.h:80
uint64_t flags
Definition: remote.c:215
GList * actions_before
Definition: actions.h:370
int required_runnable_before
Definition: actions.h:367
G_GNUC_INTERNAL bool pcmk__is_failed_remote_node(const pcmk_node_t *node)
pcmk_resource_t * remote_rsc
Definition: nodes.h:135
No relation (compare with equality rather than bit set)
GHashTable * allowed_nodes
Definition: resources.h:462
Where resource is running.