pacemaker  2.1.9-49aab99839
Scalable High-Availability cluster resource manager
pcmk_sched_probes.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2024 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <glib.h>
13 
14 #include <crm/crm.h>
15 #include <crm/pengine/status.h>
16 #include <pacemaker-internal.h>
17 #include "libpacemaker_private.h"
18 
27 static void
28 add_expected_result(pcmk_action_t *probe, const pcmk_resource_t *rsc,
29  const pcmk_node_t *node)
30 {
31  // Check whether resource is currently active on node
32  pcmk_node_t *running = pe_find_node_id(rsc->running_on, node->details->id);
33 
34  // The expected result is what we think the resource's current state is
35  if (running == NULL) {
37 
38  } else if (rsc->role == pcmk_role_promoted) {
40  }
41 }
42 
52 bool
54 {
55  bool any_created = false;
56 
57  for (GList *iter = rscs; iter != NULL; iter = iter->next) {
58  pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
59 
60  if (rsc->cmds->create_probe(rsc, node)) {
61  any_created = true;
62  }
63  }
64  return any_created;
65 }
66 
74 static void
75 probe_then_start(pcmk_resource_t *rsc1, pcmk_resource_t *rsc2)
76 {
77  if ((rsc1->allocated_to != NULL)
78  && (g_hash_table_lookup(rsc1->known_on,
79  rsc1->allocated_to->details->id) == NULL)) {
80 
83  NULL,
85  NULL,
87  }
88 }
89 
98 static bool
99 guest_resource_will_stop(const pcmk_node_t *node)
100 {
101  const pcmk_resource_t *guest_rsc = node->details->remote_rsc->container;
102 
103  /* Ideally, we'd check whether the guest has a required stop, but that
104  * information doesn't exist yet, so approximate it ...
105  */
106  return node->details->remote_requires_reset
107  || node->details->unclean
108  || pcmk_is_set(guest_rsc->flags, pcmk_rsc_failed)
109  || (guest_rsc->next_role == pcmk_role_stopped)
110 
111  // Guest is moving
112  || ((guest_rsc->role > pcmk_role_stopped)
113  && (guest_rsc->allocated_to != NULL)
114  && (pcmk__find_node_in_list(guest_rsc->running_on,
115  guest_rsc->allocated_to->details->uname) == NULL));
116 }
117 
127 static pcmk_action_t *
128 probe_action(pcmk_resource_t *rsc, pcmk_node_t *node)
129 {
130  pcmk_action_t *probe = NULL;
131  char *key = pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0);
132 
133  crm_debug("Scheduling probe of %s %s on %s",
134  pcmk_role_text(rsc->role), rsc->id, pcmk__node_name(node));
135 
136  probe = custom_action(rsc, key, PCMK_ACTION_MONITOR, node, FALSE,
137  rsc->cluster);
139 
140  pcmk__order_vs_unfence(rsc, node, probe, pcmk__ar_ordered);
141  add_expected_result(probe, rsc, node);
142  return probe;
143 }
144 
156 bool
158 {
159  uint32_t flags = pcmk__ar_ordered;
160  pcmk_action_t *probe = NULL;
161  pcmk_node_t *allowed = NULL;
162  pcmk_resource_t *top = uber_parent(rsc);
163  const char *reason = NULL;
164 
165  pcmk__assert((rsc != NULL) && (node != NULL));
166 
168  reason = "start-up probes are disabled";
169  goto no_probe;
170  }
171 
172  if (pcmk__is_pacemaker_remote_node(node)) {
174  reason = "Pacemaker Remote nodes cannot run stonith agents";
175  goto no_probe;
176 
177  } else if (pcmk__is_guest_or_bundle_node(node)
179  reason = "guest nodes cannot run resources containing guest nodes";
180  goto no_probe;
181 
182  } else if (rsc->is_remote_node) {
183  reason = "Pacemaker Remote nodes cannot host remote connections";
184  goto no_probe;
185  }
186  }
187 
188  // If this is a collective resource, probes are created for its children
189  if (rsc->children != NULL) {
190  return pcmk__probe_resource_list(rsc->children, node);
191  }
192 
193  if ((rsc->container != NULL) && !rsc->is_remote_node) {
194  reason = "resource is inside a container";
195  goto no_probe;
196 
197  } else if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
198  reason = "resource is orphaned";
199  goto no_probe;
200 
201  } else if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
202  reason = "resource state is already known";
203  goto no_probe;
204  }
205 
206  allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
207 
208  if (rsc->exclusive_discover || top->exclusive_discover) {
209  // Exclusive discovery is enabled ...
210 
211  if (allowed == NULL) {
212  // ... but this node is not allowed to run the resource
213  reason = "resource has exclusive discovery but is not allowed "
214  "on node";
215  goto no_probe;
216 
217  } else if (allowed->rsc_discover_mode != pcmk_probe_exclusive) {
218  // ... but no constraint marks this node for discovery of resource
219  reason = "resource has exclusive discovery but is not enabled "
220  "on node";
221  goto no_probe;
222  }
223  }
224 
225  if (allowed == NULL) {
226  allowed = node;
227  }
228  if (allowed->rsc_discover_mode == pcmk_probe_never) {
229  reason = "node has discovery disabled";
230  goto no_probe;
231  }
232 
233  if (pcmk__is_guest_or_bundle_node(node)) {
234  pcmk_resource_t *guest = node->details->remote_rsc->container;
235 
236  if (guest->role == pcmk_role_stopped) {
237  // The guest is stopped, so we know no resource is active there
238  reason = "node's guest is stopped";
239  probe_then_start(guest, top);
240  goto no_probe;
241 
242  } else if (guest_resource_will_stop(node)) {
243  reason = "node's guest will stop";
244 
245  // Order resource start after guest stop (in case it's restarting)
246  pcmk__new_ordering(guest,
247  pcmk__op_key(guest->id, PCMK_ACTION_STOP, 0),
248  NULL, top,
250  NULL, pcmk__ar_ordered, rsc->cluster);
251  goto no_probe;
252  }
253  }
254 
255  // We've eliminated all cases where a probe is not needed, so now it is
256  probe = probe_action(rsc, node);
257 
258  /* Below, we will order the probe relative to start or reload. If this is a
259  * clone instance, the start or reload is for the entire clone rather than
260  * just the instance. Otherwise, the start or reload is for the resource
261  * itself.
262  */
263  if (!pcmk__is_clone(top)) {
264  top = rsc;
265  }
266 
267  /* Prevent a start if the resource can't be probed, but don't cause the
268  * resource or entire clone to stop if already active.
269  */
271  && (top->running_on == NULL)) {
273  }
274 
275  // Start or reload after probing the resource
276  pcmk__new_ordering(rsc, NULL, probe,
277  top, pcmk__op_key(top->id, PCMK_ACTION_START, 0), NULL,
278  flags, rsc->cluster);
279  pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
280  pcmk__ar_ordered, rsc->cluster);
281 
282  return true;
283 
284 no_probe:
285  pcmk__rsc_trace(rsc,
286  "Skipping probe for %s on %s because %s",
287  rsc->id, node->details->id, reason);
288  return false;
289 }
290 
300 static bool
301 probe_needed_before_action(const pcmk_action_t *probe,
302  const pcmk_action_t *then)
303 {
304  // Probes on a node are performed after unfencing it, not before
305  if (pcmk__str_eq(then->task, PCMK_ACTION_STONITH, pcmk__str_none)
306  && pcmk__same_node(probe->node, then->node)) {
307  const char *op = g_hash_table_lookup(then->meta,
309 
310  if (pcmk__str_eq(op, PCMK_ACTION_ON, pcmk__str_casei)) {
311  return false;
312  }
313  }
314 
315  // Probes should be done on a node before shutting it down
316  if (pcmk__str_eq(then->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)
317  && (probe->node != NULL) && (then->node != NULL)
318  && !pcmk__same_node(probe->node, then->node)) {
319  return false;
320  }
321 
322  // Otherwise probes should always be done before any other action
323  return true;
324 }
325 
339 static void
340 add_probe_orderings_for_stops(pcmk_scheduler_t *scheduler)
341 {
342  for (GList *iter = scheduler->ordering_constraints; iter != NULL;
343  iter = iter->next) {
344 
345  pcmk__action_relation_t *order = iter->data;
346  uint32_t order_flags = pcmk__ar_ordered;
347  GList *probes = NULL;
348  GList *then_actions = NULL;
349  pcmk_action_t *first = NULL;
350  pcmk_action_t *then = NULL;
351 
352  // Skip disabled orderings
353  if (order->flags == pcmk__ar_none) {
354  continue;
355  }
356 
357  // Skip non-resource orderings, and orderings for the same resource
358  if ((order->rsc1 == NULL) || (order->rsc1 == order->rsc2)) {
359  continue;
360  }
361 
362  // Skip invalid orderings (shouldn't be possible)
363  first = order->action1;
364  then = order->action2;
365  if (((first == NULL) && (order->task1 == NULL))
366  || ((then == NULL) && (order->task2 == NULL))) {
367  continue;
368  }
369 
370  // Skip orderings for first actions other than stop
371  if ((first != NULL) && !pcmk__str_eq(first->task, PCMK_ACTION_STOP,
372  pcmk__str_none)) {
373  continue;
374  } else if ((first == NULL)
375  && !pcmk__ends_with(order->task1,
376  "_" PCMK_ACTION_STOP "_0")) {
377  continue;
378  }
379 
380  /* Do not imply a probe ordering for a resource inside of a stopping
381  * container. Otherwise, it might introduce a transition loop, since a
382  * probe could be scheduled after the container starts again.
383  */
384  if ((order->rsc2 != NULL) && (order->rsc1->container == order->rsc2)) {
385 
386  if ((then != NULL) && pcmk__str_eq(then->task, PCMK_ACTION_STOP,
387  pcmk__str_none)) {
388  continue;
389  } else if ((then == NULL)
390  && pcmk__ends_with(order->task2,
391  "_" PCMK_ACTION_STOP "_0")) {
392  continue;
393  }
394  }
395 
396  // Preserve certain order options for future filtering
398  pcmk__set_relation_flags(order_flags,
400  }
403  }
404 
405  // Preserve certain order types for future filtering
407  || (order->flags == pcmk__ar_if_on_same_node_or_target)) {
408  order_flags = order->flags;
409  }
410 
411  // List all scheduled probes for the first resource
412  probes = pe__resource_actions(order->rsc1, NULL, PCMK_ACTION_MONITOR,
413  FALSE);
414  if (probes == NULL) { // There aren't any
415  continue;
416  }
417 
418  // List all relevant "then" actions
419  if (then != NULL) {
420  then_actions = g_list_prepend(NULL, then);
421 
422  } else if (order->rsc2 != NULL) {
423  then_actions = find_actions(order->rsc2->actions, order->task2,
424  NULL);
425  if (then_actions == NULL) { // There aren't any
426  g_list_free(probes);
427  continue;
428  }
429  }
430 
431  crm_trace("Implying 'probe then' orderings for '%s then %s' "
432  "(id=%d, type=%.6x)",
433  ((first == NULL)? order->task1 : first->uuid),
434  ((then == NULL)? order->task2 : then->uuid),
435  order->id, order->flags);
436 
437  for (GList *probe_iter = probes; probe_iter != NULL;
438  probe_iter = probe_iter->next) {
439 
440  pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
441 
442  for (GList *then_iter = then_actions; then_iter != NULL;
443  then_iter = then_iter->next) {
444 
445  pcmk_action_t *then = (pcmk_action_t *) then_iter->data;
446 
447  if (probe_needed_before_action(probe, then)) {
448  order_actions(probe, then, order_flags);
449  }
450  }
451  }
452 
453  g_list_free(then_actions);
454  g_list_free(probes);
455  }
456 }
457 
468 static void
469 add_start_orderings_for_probe(pcmk_action_t *probe,
470  pcmk__related_action_t *after)
471 {
473 
474  /* Although the ordering between the probe of the clone instance and the
475  * start of its parent has been added in pcmk__probe_rsc_on_node(), we
476  * avoided enforcing `pcmk__ar_unrunnable_first_blocks` order type for that
477  * as long as any of the clone instances are running to prevent them from
478  * being unexpectedly stopped.
479  *
480  * On the other hand, we still need to prevent any inactive instances from
481  * starting unless the probe is runnable so that we don't risk starting too
482  * many instances before we know the state on all nodes.
483  */
484  if ((after->action->rsc->variant <= pcmk_rsc_variant_group)
486  // The order type is already enforced for its parent.
488  || (pe__const_top_resource(probe->rsc, false) != after->action->rsc)
489  || !pcmk__str_eq(after->action->task, PCMK_ACTION_START,
490  pcmk__str_none)) {
491  return;
492  }
493 
494  crm_trace("Adding probe start orderings for 'unrunnable %s@%s "
495  "then instances of %s@%s'",
496  probe->uuid, pcmk__node_name(probe->node),
497  after->action->uuid, pcmk__node_name(after->action->node));
498 
499  for (GList *then_iter = after->action->actions_after; then_iter != NULL;
500  then_iter = then_iter->next) {
501 
502  pcmk__related_action_t *then = then_iter->data;
503 
504  if (then->action->rsc->running_on
505  || (pe__const_top_resource(then->action->rsc, false)
506  != after->action->rsc)
507  || !pcmk__str_eq(then->action->task, PCMK_ACTION_START,
508  pcmk__str_none)) {
509  continue;
510  }
511 
512  crm_trace("Adding probe start ordering for 'unrunnable %s@%s "
513  "then %s@%s' (type=%#.6x)",
514  probe->uuid, pcmk__node_name(probe->node),
515  then->action->uuid, pcmk__node_name(then->action->node),
516  flags);
517 
518  /* Prevent the instance from starting if the instance can't, but don't
519  * cause any other intances to stop if already active.
520  */
521  order_actions(probe, then->action, flags);
522  }
523 
524  return;
525 }
526 
539 static void
540 add_restart_orderings_for_probe(pcmk_action_t *probe, pcmk_action_t *after)
541 {
542  GList *iter = NULL;
543  bool interleave = false;
544  pcmk_resource_t *compatible_rsc = NULL;
545 
546  // Validate that this is a resource probe followed by some action
547  if ((after == NULL) || (probe == NULL) || !pcmk__is_primitive(probe->rsc)
548  || !pcmk__str_eq(probe->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
549  return;
550  }
551 
552  // Avoid running into any possible loop
554  return;
555  }
557 
558  crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
559  probe->uuid, pcmk__node_name(probe->node),
560  after->uuid, pcmk__node_name(after->node));
561 
562  /* Add restart orderings if "then" is for a different primitive.
563  * Orderings for collective resources will be added later.
564  */
565  if (pcmk__is_primitive(after->rsc) && (probe->rsc != after->rsc)) {
566 
567  GList *then_actions = NULL;
568 
569  if (pcmk__str_eq(after->task, PCMK_ACTION_START, pcmk__str_none)) {
570  then_actions = pe__resource_actions(after->rsc, NULL,
571  PCMK_ACTION_STOP, FALSE);
572 
573  } else if (pcmk__str_eq(after->task, PCMK_ACTION_PROMOTE,
574  pcmk__str_none)) {
575  then_actions = pe__resource_actions(after->rsc, NULL,
576  PCMK_ACTION_DEMOTE, FALSE);
577  }
578 
579  for (iter = then_actions; iter != NULL; iter = iter->next) {
580  pcmk_action_t *then = (pcmk_action_t *) iter->data;
581 
582  // Skip pseudo-actions (for example, those implied by fencing)
583  if (!pcmk_is_set(then->flags, pcmk_action_pseudo)) {
584  order_actions(probe, then, pcmk__ar_ordered);
585  }
586  }
587  g_list_free(then_actions);
588  }
589 
590  /* Detect whether "then" is an interleaved clone action. For these, we want
591  * to add orderings only for the relevant instance.
592  */
593  if ((after->rsc != NULL)
594  && (after->rsc->variant > pcmk_rsc_variant_group)) {
595  const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
597 
598  interleave = crm_is_true(interleave_s);
599  if (interleave) {
600  compatible_rsc = pcmk__find_compatible_instance(probe->rsc,
601  after->rsc,
603  false);
604  }
605  }
606 
607  /* Now recursively do the same for all actions ordered after "then". This
608  * also handles collective resources since the collective action will be
609  * ordered before its individual instances' actions.
610  */
611  for (iter = after->actions_after; iter != NULL; iter = iter->next) {
612  pcmk__related_action_t *after_wrapper = iter->data;
613 
614  /* pcmk__ar_first_implies_then is the reason why a required A.start
615  * implies/enforces B.start to be required too, which is the cause of
616  * B.restart/re-promote.
617  *
618  * Not sure about pcmk__ar_first_implies_same_node_then though. It's now
619  * only used for unfencing case, which tends to introduce transition
620  * loops...
621  */
622  if (!pcmk_is_set(after_wrapper->type, pcmk__ar_first_implies_then)) {
623  /* The order type between a group/clone and its child such as
624  * B.start-> B_child.start is:
625  * pcmk__ar_then_implies_first_graphed
626  * |pcmk__ar_unrunnable_first_blocks
627  *
628  * Proceed through the ordering chain and build dependencies with
629  * its children.
630  */
631  if ((after->rsc == NULL)
632  || (after->rsc->variant < pcmk_rsc_variant_group)
633  || (probe->rsc->parent == after->rsc)
634  || (after_wrapper->action->rsc == NULL)
635  || (after_wrapper->action->rsc->variant > pcmk_rsc_variant_group)
636  || (after->rsc != after_wrapper->action->rsc->parent)) {
637  continue;
638  }
639 
640  /* Proceed to the children of a group or a non-interleaved clone.
641  * For an interleaved clone, proceed only to the relevant child.
642  */
643  if ((after->rsc->variant > pcmk_rsc_variant_group) && interleave
644  && ((compatible_rsc == NULL)
645  || (compatible_rsc != after_wrapper->action->rsc))) {
646  continue;
647  }
648  }
649 
650  crm_trace("Recursively adding probe restart orderings for "
651  "'%s@%s then %s@%s' (type=%#.6x)",
652  after->uuid, pcmk__node_name(after->node),
653  after_wrapper->action->uuid,
654  pcmk__node_name(after_wrapper->action->node),
655  after_wrapper->type);
656 
657  add_restart_orderings_for_probe(probe, after_wrapper->action);
658  }
659 }
660 
667 static void
668 clear_actions_tracking_flag(pcmk_scheduler_t *scheduler)
669 {
670  for (GList *iter = scheduler->actions; iter != NULL; iter = iter->next) {
671  pcmk_action_t *action = iter->data;
672 
674  }
675 }
676 
684 static void
685 add_start_restart_orderings_for_rsc(gpointer data, gpointer user_data)
686 {
687  pcmk_resource_t *rsc = data;
688  GList *probes = NULL;
689 
690  // For collective resources, order each instance recursively
691  if (!pcmk__is_primitive(rsc)) {
692  g_list_foreach(rsc->children, add_start_restart_orderings_for_rsc,
693  NULL);
694  return;
695  }
696 
697  // Find all probes for given resource
698  probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
699 
700  // Add probe restart orderings for each probe found
701  for (GList *iter = probes; iter != NULL; iter = iter->next) {
702  pcmk_action_t *probe = (pcmk_action_t *) iter->data;
703 
704  for (GList *then_iter = probe->actions_after; then_iter != NULL;
705  then_iter = then_iter->next) {
706 
707  pcmk__related_action_t *then = then_iter->data;
708 
709  add_start_orderings_for_probe(probe, then);
710  add_restart_orderings_for_probe(probe, then->action);
711  clear_actions_tracking_flag(rsc->cluster);
712  }
713  }
714 
715  g_list_free(probes);
716 }
717 
726 static void
727 order_then_probes(pcmk_scheduler_t *scheduler)
728 {
729 #if 0
730  /* Given an ordering "A then B", we would prefer to wait for A to be started
731  * before probing B.
732  *
733  * For example, if A is a filesystem which B can't even run without, it
734  * would be helpful if the author of B's agent could assume that A is
735  * running before B.monitor will be called.
736  *
737  * However, we can't _only_ probe after A is running, otherwise we wouldn't
738  * detect the state of B if A could not be started. We can't even do an
739  * opportunistic version of this, because B may be moving:
740  *
741  * A.stop -> A.start -> B.probe -> B.stop -> B.start
742  *
743  * and if we add B.stop -> A.stop here, we get a loop:
744  *
745  * A.stop -> A.start -> B.probe -> B.stop -> A.stop
746  *
747  * We could kill the "B.probe -> B.stop" dependency, but that could mean
748  * stopping B "too" soon, because B.start must wait for the probe, and
749  * we don't want to stop B if we can't start it.
750  *
751  * We could add the ordering only if A is an anonymous clone with
752  * clone-max == node-max (since we'll never be moving it). However, we could
753  * still be stopping one instance at the same time as starting another.
754  *
755  * The complexity of checking for allowed conditions combined with the ever
756  * narrowing use case suggests that this code should remain disabled until
757  * someone gets smarter.
758  */
759  for (GList *iter = scheduler->resources; iter != NULL; iter = iter->next) {
760  pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
761 
762  pcmk_action_t *start = NULL;
763  GList *actions = NULL;
764  GList *probes = NULL;
765 
766  actions = pe__resource_actions(rsc, NULL, PCMK_ACTION_START, FALSE);
767 
768  if (actions) {
769  start = actions->data;
770  g_list_free(actions);
771  }
772 
773  if (start == NULL) {
774  crm_debug("No start action for %s", rsc->id);
775  continue;
776  }
777 
778  probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
779 
780  for (actions = start->actions_before; actions != NULL;
781  actions = actions->next) {
782 
783  pcmk__related_action_t *before = actions->data;
784 
785  pcmk_action_t *first = before->action;
786  pcmk_resource_t *first_rsc = first->rsc;
787 
788  if (first->required_runnable_before) {
789  for (GList *clone_actions = first->actions_before;
790  clone_actions != NULL;
791  clone_actions = clone_actions->next) {
792 
793  before = clone_actions->data;
794 
795  crm_trace("Testing '%s then %s' for %s",
796  first->uuid, before->action->uuid, start->uuid);
797 
798  pcmk__assert(before->action->rsc != NULL);
799  first_rsc = before->action->rsc;
800  break;
801  }
802 
803  } else if (!pcmk__str_eq(first->task, PCMK_ACTION_START,
804  pcmk__str_none)) {
805  crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
806  }
807 
808  if (first_rsc == NULL) {
809  continue;
810 
811  } else if (pe__const_top_resource(first_rsc, false)
812  == pe__const_top_resource(start->rsc, false)) {
813  crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
814  continue;
815 
816  } else if (!pcmk__is_clone(pe__const_top_resource(first_rsc,
817  false))) {
818  crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
819  continue;
820  }
821 
822  crm_debug("Applying %s before %s %d", first->uuid, start->uuid,
823  pe__const_top_resource(first_rsc, false)->variant);
824 
825  for (GList *probe_iter = probes; probe_iter != NULL;
826  probe_iter = probe_iter->next) {
827 
828  pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
829 
830  crm_debug("Ordering %s before %s", first->uuid, probe->uuid);
831  order_actions(first, probe, pcmk__ar_ordered);
832  }
833  }
834  }
835 #endif
836 }
837 
838 void
840 {
841  // Add orderings for "probe then X"
842  g_list_foreach(scheduler->resources, add_start_restart_orderings_for_rsc,
843  NULL);
844  add_probe_orderings_for_stops(scheduler);
845 
846  order_then_probes(scheduler);
847 }
848 
857 void
859 {
860  // Schedule probes on each node in the cluster as needed
861  for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
862  pcmk_node_t *node = (pcmk_node_t *) iter->data;
863  const char *probed = NULL;
864 
865  if (!node->details->online) { // Don't probe offline nodes
866  if (pcmk__is_failed_remote_node(node)) {
867  pe_fence_node(scheduler, node,
868  "the connection is unrecoverable", FALSE);
869  }
870  continue;
871 
872  } else if (node->details->unclean) { // ... or nodes that need fencing
873  continue;
874 
875  } else if (!node->details->rsc_discovery_enabled) {
876  // The user requested that probes not be done on this node
877  continue;
878  }
879 
880  /* This is no longer needed for live clusters, since the probe_complete
881  * node attribute will never be in the CIB. However this is still useful
882  * for processing old saved CIBs (< 1.1.14), including the
883  * reprobe-target_rc regression test.
884  */
885  probed = pcmk__node_attr(node, CRM_OP_PROBED, NULL,
887  if (probed != NULL && crm_is_true(probed) == FALSE) {
888  pcmk_action_t *probe_op = NULL;
889 
890  probe_op = custom_action(NULL,
892  node->details->uname),
893  CRM_OP_REPROBE, node, FALSE, scheduler);
895  continue;
896  }
897 
898  // Probe each resource in the cluster on this node, as needed
900  }
901 }
pcmk_assignment_methods_t * cmds
Definition: resources.h:413
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition: complex.c:1071
pcmk_resource_t * pe__resource_contains_guest_node(const pcmk_scheduler_t *scheduler, const pcmk_resource_t *rsc)
Definition: remote.c:29
Relation applies only if actions are on same node.
A dumping ground.
GHashTable * known_on
Definition: resources.h:459
&#39;then&#39; is runnable (and migratable) only if &#39;first&#39; is runnable
pcmk_scheduler_t * cluster
Definition: resources.h:408
Actions are ordered if on same node (or migration target for migrate_to)
const char * pcmk_role_text(enum rsc_role_e role)
Get readable description of a resource role.
Definition: roles.c:23
char data[0]
Definition: cpg.c:58
G_GNUC_INTERNAL void pcmk__new_ordering(pcmk_resource_t *first_rsc, char *first_task, pcmk_action_t *first_action, pcmk_resource_t *then_rsc, char *then_task, pcmk_action_t *then_action, uint32_t flags, pcmk_scheduler_t *sched)
G_GNUC_INTERNAL void pcmk__order_vs_unfence(const pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_action_t *action, enum pcmk__action_relation_flags order)
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
Definition: complex.c:1046
Stopped.
Definition: roles.h:36
pcmk_resource_t rsc2
pcmk_action_t * action
Definition: actions.h:322
enum rsc_role_e role
Definition: resources.h:464
#define CRM_OP_REPROBE
Definition: crm.h:136
GList * children
Definition: resources.h:471
#define pcmk__rsc_trace(rsc, fmt, args...)
#define PCMK_ACTION_ON
Definition: actions.h:64
Service active and promoted.
Definition: results.h:261
enum rsc_role_e next_role
Definition: resources.h:465
gboolean exclusive_discover
Definition: resources.h:432
#define reload_key(rsc)
Definition: internal.h:218
#define pcmk__insert_meta(obj, name, value)
#define PCMK_ACTION_MONITOR
Definition: actions.h:60
GHashTable * meta
Definition: resources.h:467
bool pcmk__probe_rsc_on_node(pcmk_resource_t *rsc, pcmk_node_t *node)
G_GNUC_INTERNAL pcmk_resource_t * pcmk__find_compatible_instance(const pcmk_resource_t *match_rsc, const pcmk_resource_t *rsc, enum rsc_role_e role, bool current)
#define PCMK_META_INTERLEAVE
Definition: options.h:90
#define PCMK_ACTION_DO_SHUTDOWN
Definition: actions.h:51
Promoted.
Definition: roles.h:39
void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:112
#define PCMK__META_STONITH_ACTION
GList * actions
Definition: scheduler.h:239
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1553
pcmk_resource_t * container
Definition: resources.h:476
bool pcmk__ends_with(const char *s, const char *match)
Definition: strings.c:620
#define pcmk__set_relation_flags(ar_flags, flags_to_set)
gboolean remote_requires_reset
Definition: nodes.h:113
const char * action
Definition: pcmk_fence.c:30
GList * resources
Definition: scheduler.h:231
#define PCMK_ACTION_DEMOTE
Definition: actions.h:49
pcmk_resource_t * parent
Definition: resources.h:409
pcmk_node_t * node
Definition: actions.h:341
#define crm_debug(fmt, args...)
Definition: logging.h:402
Actions are ordered (optionally, if no other flags are set)
#define pcmk__clear_action_flags(action, flags_to_clear)
pcmk_node_t * pcmk__find_node_in_list(const GList *nodes, const char *node_name)
Definition: nodes.c:150
pcmk_node_t * pe_find_node_id(const GList *node_list, const char *id)
Find a node by ID in a list of nodes.
Definition: status.c:492
char * task
Definition: actions.h:343
GList * actions_after
Definition: actions.h:371
#define crm_trace(fmt, args...)
Definition: logging.h:404
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
GHashTable * meta
Definition: actions.h:354
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:94
struct pe_node_shared_s * details
Definition: nodes.h:168
#define PCMK_ACTION_START
Definition: actions.h:72
unsigned long long flags
Definition: resources.h:428
const char * uname
Definition: nodes.h:74
Ordering applies only if &#39;first&#39; is required and on same node as &#39;then&#39;.
#define PCMK_ACTION_STOP
Definition: actions.h:75
GList * actions
Definition: resources.h:444
#define PCMK_ACTION_STONITH
Definition: actions.h:74
#define PCMK_VALUE_TRUE
Definition: options.h:215
char * uuid
Definition: actions.h:344
void pcmk__schedule_probes(pcmk_scheduler_t *scheduler)
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition: actions.c:196
void pcmk__order_probes(pcmk_scheduler_t *scheduler)
enum pe_obj_types variant
Definition: resources.h:410
bool(* create_probe)(pcmk_resource_t *rsc, pcmk_node_t *node)
gboolean order_actions(pcmk_action_t *first, pcmk_action_t *then, uint32_t flags)
Definition: utils.c:457
int rsc_discover_mode
Definition: nodes.h:171
pcmk_resource_t rsc1
Service safely stopped.
Definition: results.h:260
const char * id
Definition: nodes.h:73
pcmk_action_t * custom_action(pcmk_resource_t *rsc, char *key, const char *task, const pcmk_node_t *on_node, gboolean optional, pcmk_scheduler_t *scheduler)
Create or update an action object.
Definition: pe_actions.c:1129
#define pcmk__assert(expr)
void pe__add_action_expected_result(pcmk_action_t *action, int expected_result)
Definition: pe_actions.c:1830
gboolean rsc_discovery_enabled
Definition: nodes.h:107
Cluster status and scheduling.
gboolean is_remote_node
Definition: resources.h:431
GList * ordering_constraints
Definition: scheduler.h:233
pcmk_scheduler_t * scheduler
GList * find_actions(GList *input, const char *key, const pcmk_node_t *on_node)
Definition: pe_actions.c:1480
Relation applies only if &#39;first&#39; cannot be part of a live migration.
#define PCMK__META_OP_NO_WAIT
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
Definition: attrs.c:118
bool pcmk__probe_resource_list(GList *rscs, pcmk_node_t *node)
pcmk_node_t * allocated_to
Definition: resources.h:447
#define PCMK_ACTION_PROMOTE
Definition: actions.h:66
GList * running_on
Definition: resources.h:456
enum pe_action_flags flags
Definition: actions.h:349
#define CRM_OP_PROBED
Definition: crm.h:135
#define pcmk__set_action_flags(action, flags_to_set)
gboolean crm_is_true(const char *s)
Definition: strings.c:500
pcmk_resource_t * rsc
Definition: actions.h:340
Resource role is unknown.
Definition: roles.h:35
unsigned long long flags
Definition: scheduler.h:211
enum pe_ordering type
Definition: actions.h:317
gboolean unclean
Definition: nodes.h:92
gboolean online
Definition: nodes.h:81
uint64_t flags
Definition: remote.c:215
GList * actions_before
Definition: actions.h:370
int required_runnable_before
Definition: actions.h:367
G_GNUC_INTERNAL bool pcmk__is_failed_remote_node(const pcmk_node_t *node)
pcmk_resource_t * remote_rsc
Definition: nodes.h:136
No relation (compare with equality rather than bit set)
GHashTable * allowed_nodes
Definition: resources.h:462
Where resource is running.