pacemaker  2.1.6-802a72226b
Scalable High-Availability cluster resource manager
pcmk_sched_probes.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2023 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <glib.h>
13 
14 #include <crm/crm.h>
15 #include <crm/pengine/status.h>
16 #include <pacemaker-internal.h>
17 #include "libpacemaker_private.h"
18 
27 static void
28 add_expected_result(pe_action_t *probe, const pe_resource_t *rsc,
29  const pe_node_t *node)
30 {
31  // Check whether resource is currently active on node
32  pe_node_t *running = pe_find_node_id(rsc->running_on, node->details->id);
33 
34  // The expected result is what we think the resource's current state is
35  if (running == NULL) {
37 
38  } else if (rsc->role == RSC_ROLE_PROMOTED) {
40  }
41 }
42 
52 bool
54 {
55  bool any_created = false;
56 
57  for (GList *iter = rscs; iter != NULL; iter = iter->next) {
58  pe_resource_t *rsc = (pe_resource_t *) iter->data;
59 
60  if (rsc->cmds->create_probe(rsc, node)) {
61  any_created = true;
62  }
63  }
64  return any_created;
65 }
66 
74 static void
75 probe_then_start(pe_resource_t *rsc1, pe_resource_t *rsc2)
76 {
77  if ((rsc1->allocated_to != NULL)
78  && (g_hash_table_lookup(rsc1->known_on,
79  rsc1->allocated_to->details->id) == NULL)) {
80 
82  rsc2, pcmk__op_key(rsc2->id, RSC_START, 0), NULL,
84  }
85 }
86 
95 static bool
96 guest_resource_will_stop(const pe_node_t *node)
97 {
98  const pe_resource_t *guest_rsc = node->details->remote_rsc->container;
99 
100  /* Ideally, we'd check whether the guest has a required stop, but that
101  * information doesn't exist yet, so approximate it ...
102  */
103  return node->details->remote_requires_reset
104  || node->details->unclean
105  || pcmk_is_set(guest_rsc->flags, pe_rsc_failed)
106  || (guest_rsc->next_role == RSC_ROLE_STOPPED)
107 
108  // Guest is moving
109  || ((guest_rsc->role > RSC_ROLE_STOPPED)
110  && (guest_rsc->allocated_to != NULL)
111  && (pe_find_node(guest_rsc->running_on,
112  guest_rsc->allocated_to->details->uname) == NULL));
113 }
114 
124 static pe_action_t *
125 probe_action(pe_resource_t *rsc, pe_node_t *node)
126 {
127  pe_action_t *probe = NULL;
128  char *key = pcmk__op_key(rsc->id, RSC_STATUS, 0);
129 
130  crm_debug("Scheduling probe of %s %s on %s",
131  role2text(rsc->role), rsc->id, pe__node_name(node));
132 
133  probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE,
134  rsc->cluster);
136 
137  pcmk__order_vs_unfence(rsc, node, probe, pe_order_optional);
138  add_expected_result(probe, rsc, node);
139  return probe;
140 }
141 
153 bool
155 {
156  uint32_t flags = pe_order_optional;
157  pe_action_t *probe = NULL;
158  pe_node_t *allowed = NULL;
159  pe_resource_t *top = uber_parent(rsc);
160  const char *reason = NULL;
161 
162  CRM_CHECK((rsc != NULL) && (node != NULL), return false);
163 
165  reason = "start-up probes are disabled";
166  goto no_probe;
167  }
168 
169  if (pe__is_guest_or_remote_node(node)) {
170  const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
171 
172  if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) {
173  reason = "Pacemaker Remote nodes cannot run stonith agents";
174  goto no_probe;
175 
176  } else if (pe__is_guest_node(node)
177  && pe__resource_contains_guest_node(rsc->cluster, rsc)) {
178  reason = "guest nodes cannot run resources containing guest nodes";
179  goto no_probe;
180 
181  } else if (rsc->is_remote_node) {
182  reason = "Pacemaker Remote nodes cannot host remote connections";
183  goto no_probe;
184  }
185  }
186 
187  // If this is a collective resource, probes are created for its children
188  if (rsc->children != NULL) {
189  return pcmk__probe_resource_list(rsc->children, node);
190  }
191 
192  if ((rsc->container != NULL) && !rsc->is_remote_node) {
193  reason = "resource is inside a container";
194  goto no_probe;
195 
196  } else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
197  reason = "resource is orphaned";
198  goto no_probe;
199 
200  } else if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
201  reason = "resource state is already known";
202  goto no_probe;
203  }
204 
205  allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
206 
207  if (rsc->exclusive_discover || top->exclusive_discover) {
208  // Exclusive discovery is enabled ...
209 
210  if (allowed == NULL) {
211  // ... but this node is not allowed to run the resource
212  reason = "resource has exclusive discovery but is not allowed "
213  "on node";
214  goto no_probe;
215 
216  } else if (allowed->rsc_discover_mode != pe_discover_exclusive) {
217  // ... but no constraint marks this node for discovery of resource
218  reason = "resource has exclusive discovery but is not enabled "
219  "on node";
220  goto no_probe;
221  }
222  }
223 
224  if (allowed == NULL) {
225  allowed = node;
226  }
227  if (allowed->rsc_discover_mode == pe_discover_never) {
228  reason = "node has discovery disabled";
229  goto no_probe;
230  }
231 
232  if (pe__is_guest_node(node)) {
233  pe_resource_t *guest = node->details->remote_rsc->container;
234 
235  if (guest->role == RSC_ROLE_STOPPED) {
236  // The guest is stopped, so we know no resource is active there
237  reason = "node's guest is stopped";
238  probe_then_start(guest, top);
239  goto no_probe;
240 
241  } else if (guest_resource_will_stop(node)) {
242  reason = "node's guest will stop";
243 
244  // Order resource start after guest stop (in case it's restarting)
245  pcmk__new_ordering(guest, pcmk__op_key(guest->id, RSC_STOP, 0),
246  NULL, top, pcmk__op_key(top->id, RSC_START, 0),
247  NULL, pe_order_optional, rsc->cluster);
248  goto no_probe;
249  }
250  }
251 
252  // We've eliminated all cases where a probe is not needed, so now it is
253  probe = probe_action(rsc, node);
254 
255  /* Below, we will order the probe relative to start or reload. If this is a
256  * clone instance, the start or reload is for the entire clone rather than
257  * just the instance. Otherwise, the start or reload is for the resource
258  * itself.
259  */
260  if (!pe_rsc_is_clone(top)) {
261  top = rsc;
262  }
263 
264  /* Prevent a start if the resource can't be probed, but don't cause the
265  * resource or entire clone to stop if already active.
266  */
267  if (!pcmk_is_set(probe->flags, pe_action_runnable)
268  && (top->running_on == NULL)) {
270  }
271 
272  // Start or reload after probing the resource
273  pcmk__new_ordering(rsc, NULL, probe,
274  top, pcmk__op_key(top->id, RSC_START, 0), NULL,
275  flags, rsc->cluster);
276  pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
277  pe_order_optional, rsc->cluster);
278 
279  return true;
280 
281 no_probe:
282  pe_rsc_trace(rsc,
283  "Skipping probe for %s on %s because %s",
284  rsc->id, node->details->id, reason);
285  return false;
286 }
287 
297 static bool
298 probe_needed_before_action(const pe_action_t *probe, const pe_action_t *then)
299 {
300  // Probes on a node are performed after unfencing it, not before
301  if (pcmk__str_eq(then->task, CRM_OP_FENCE, pcmk__str_casei)
302  && (probe->node != NULL) && (then->node != NULL)
303  && (probe->node->details == then->node->details)) {
304  const char *op = g_hash_table_lookup(then->meta, "stonith_action");
305 
306  if (pcmk__str_eq(op, "on", pcmk__str_casei)) {
307  return false;
308  }
309  }
310 
311  // Probes should be done on a node before shutting it down
312  if (pcmk__str_eq(then->task, CRM_OP_SHUTDOWN, pcmk__str_none)
313  && (probe->node != NULL) && (then->node != NULL)
314  && (probe->node->details != then->node->details)) {
315  return false;
316  }
317 
318  // Otherwise probes should always be done before any other action
319  return true;
320 }
321 
335 static void
336 add_probe_orderings_for_stops(pe_working_set_t *data_set)
337 {
338  for (GList *iter = data_set->ordering_constraints; iter != NULL;
339  iter = iter->next) {
340 
341  pe__ordering_t *order = iter->data;
342  uint32_t order_flags = pe_order_optional;
343  GList *probes = NULL;
344  GList *then_actions = NULL;
345 
346  // Skip disabled orderings
347  if (order->flags == pe_order_none) {
348  continue;
349  }
350 
351  // Skip non-resource orderings, and orderings for the same resource
352  if ((order->lh_rsc == NULL) || (order->lh_rsc == order->rh_rsc)) {
353  continue;
354  }
355 
356  // Skip invalid orderings (shouldn't be possible)
357  if (((order->lh_action == NULL) && (order->lh_action_task == NULL)) ||
358  ((order->rh_action == NULL) && (order->rh_action_task == NULL))) {
359  continue;
360  }
361 
362  // Skip orderings for first actions other than stop
363  if ((order->lh_action != NULL)
364  && !pcmk__str_eq(order->lh_action->task, RSC_STOP, pcmk__str_none)) {
365  continue;
366  } else if ((order->lh_action == NULL)
367  && !pcmk__ends_with(order->lh_action_task, "_" RSC_STOP "_0")) {
368  continue;
369  }
370 
371  /* Do not imply a probe ordering for a resource inside of a stopping
372  * container. Otherwise, it might introduce a transition loop, since a
373  * probe could be scheduled after the container starts again.
374  */
375  if ((order->rh_rsc != NULL)
376  && (order->lh_rsc->container == order->rh_rsc)) {
377 
378  if ((order->rh_action != NULL)
379  && pcmk__str_eq(order->rh_action->task, RSC_STOP,
380  pcmk__str_none)) {
381  continue;
382  } else if ((order->rh_action == NULL)
384  "_" RSC_STOP "_0")) {
385  continue;
386  }
387  }
388 
389  // Preserve certain order options for future filtering
391  pe__set_order_flags(order_flags,
393  }
394  if (pcmk_is_set(order->flags, pe_order_same_node)) {
396  }
397 
398  // Preserve certain order types for future filtering
399  if ((order->flags == pe_order_anti_colocation)
400  || (order->flags == pe_order_load)) {
401  order_flags = order->flags;
402  }
403 
404  // List all scheduled probes for the first resource
405  probes = pe__resource_actions(order->lh_rsc, NULL, RSC_STATUS, FALSE);
406  if (probes == NULL) { // There aren't any
407  continue;
408  }
409 
410  // List all relevant "then" actions
411  if (order->rh_action != NULL) {
412  then_actions = g_list_prepend(NULL, order->rh_action);
413 
414  } else if (order->rh_rsc != NULL) {
415  then_actions = find_actions(order->rh_rsc->actions,
416  order->rh_action_task, NULL);
417  if (then_actions == NULL) { // There aren't any
418  g_list_free(probes);
419  continue;
420  }
421  }
422 
423  crm_trace("Implying 'probe then' orderings for '%s then %s' "
424  "(id=%d, type=%.6x)",
425  order->lh_action? order->lh_action->uuid : order->lh_action_task,
426  order->rh_action? order->rh_action->uuid : order->rh_action_task,
427  order->id, order->flags);
428 
429  for (GList *probe_iter = probes; probe_iter != NULL;
430  probe_iter = probe_iter->next) {
431 
432  pe_action_t *probe = (pe_action_t *) probe_iter->data;
433 
434  for (GList *then_iter = then_actions; then_iter != NULL;
435  then_iter = then_iter->next) {
436 
437  pe_action_t *then = (pe_action_t *) then_iter->data;
438 
439  if (probe_needed_before_action(probe, then)) {
440  order_actions(probe, then, order_flags);
441  }
442  }
443  }
444 
445  g_list_free(then_actions);
446  g_list_free(probes);
447  }
448 }
449 
460 static void
461 add_start_orderings_for_probe(pe_action_t *probe, pe_action_wrapper_t *after)
462 {
464 
465  /* Although the ordering between the probe of the clone instance and the
466  * start of its parent has been added in pcmk__probe_rsc_on_node(), we
467  * avoided enforcing `pe_order_runnable_left` order type for that as long as
468  * any of the clone instances are running to prevent them from being
469  * unexpectedly stopped.
470  *
471  * On the other hand, we still need to prevent any inactive instances from
472  * starting unless the probe is runnable so that we don't risk starting too
473  * many instances before we know the state on all nodes.
474  */
475  if (after->action->rsc->variant <= pe_group
477  // The order type is already enforced for its parent.
479  || (pe__const_top_resource(probe->rsc, false) != after->action->rsc)
480  || !pcmk__str_eq(after->action->task, RSC_START, pcmk__str_none)) {
481  return;
482  }
483 
484  crm_trace("Adding probe start orderings for '%s@%s (%s) "
485  "then instances of %s@%s'",
486  probe->uuid, pe__node_name(probe->node),
487  pcmk_is_set(probe->flags, pe_action_runnable)? "runnable" : "unrunnable",
488  after->action->uuid, pe__node_name(after->action->node));
489 
490  for (GList *then_iter = after->action->actions_after; then_iter != NULL;
491  then_iter = then_iter->next) {
492 
493  pe_action_wrapper_t *then = (pe_action_wrapper_t *) then_iter->data;
494 
495  if (then->action->rsc->running_on
496  || (pe__const_top_resource(then->action->rsc, false)
497  != after->action->rsc)
498  || !pcmk__str_eq(then->action->task, RSC_START, pcmk__str_none)) {
499  continue;
500  }
501 
502  crm_trace("Adding probe start ordering for '%s@%s (%s) "
503  "then %s@%s' (type=%#.6x)",
504  probe->uuid, pe__node_name(probe->node),
505  pcmk_is_set(probe->flags, pe_action_runnable)? "runnable" : "unrunnable",
506  then->action->uuid, pe__node_name(then->action->node),
507  flags);
508 
509  /* Prevent the instance from starting if the instance can't, but don't
510  * cause any other intances to stop if already active.
511  */
512  order_actions(probe, then->action, flags);
513  }
514 
515  return;
516 }
517 
531 static void
532 add_restart_orderings_for_probe(pe_action_t *probe, pe_action_t *after,
534 {
535  GList *iter = NULL;
536  bool interleave = false;
537  pe_resource_t *compatible_rsc = NULL;
538 
539  // Validate that this is a resource probe followed by some action
540  if ((after == NULL) || (probe == NULL) || (probe->rsc == NULL)
541  || (probe->rsc->variant != pe_native)
542  || !pcmk__str_eq(probe->task, RSC_STATUS, pcmk__str_casei)) {
543  return;
544  }
545 
546  // Avoid running into any possible loop
547  if (pcmk_is_set(after->flags, pe_action_tracking)) {
548  return;
549  }
551 
552  crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
553  probe->uuid, pe__node_name(probe->node),
554  after->uuid, pe__node_name(after->node));
555 
556  /* Add restart orderings if "then" is for a different primitive.
557  * Orderings for collective resources will be added later.
558  */
559  if ((after->rsc != NULL) && (after->rsc->variant == pe_native)
560  && (probe->rsc != after->rsc)) {
561 
562  GList *then_actions = NULL;
563 
564  if (pcmk__str_eq(after->task, RSC_START, pcmk__str_casei)) {
565  then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP,
566  FALSE);
567 
568  } else if (pcmk__str_eq(after->task, RSC_PROMOTE, pcmk__str_casei)) {
569  then_actions = pe__resource_actions(after->rsc, NULL,
570  RSC_DEMOTE, FALSE);
571  }
572 
573  for (iter = then_actions; iter != NULL; iter = iter->next) {
574  pe_action_t *then = (pe_action_t *) iter->data;
575 
576  // Skip pseudo-actions (for example, those implied by fencing)
577  if (!pcmk_is_set(then->flags, pe_action_pseudo)) {
578  order_actions(probe, then, pe_order_optional);
579  }
580  }
581  g_list_free(then_actions);
582  }
583 
584  /* Detect whether "then" is an interleaved clone action. For these, we want
585  * to add orderings only for the relevant instance.
586  */
587  if ((after->rsc != NULL)
588  && (after->rsc->variant > pe_group)) {
589  const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
591 
592  interleave = crm_is_true(interleave_s);
593  if (interleave) {
594  compatible_rsc = pcmk__find_compatible_instance(probe->rsc,
595  after->rsc,
597  false);
598  }
599  }
600 
601  /* Now recursively do the same for all actions ordered after "then". This
602  * also handles collective resources since the collective action will be
603  * ordered before its individual instances' actions.
604  */
605  for (iter = after->actions_after; iter != NULL; iter = iter->next) {
606  pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) iter->data;
607 
608  /* pe_order_implies_then is the reason why a required A.start
609  * implies/enforces B.start to be required too, which is the cause of
610  * B.restart/re-promote.
611  *
612  * Not sure about pe_order_implies_then_on_node though. It's now only
613  * used for unfencing case, which tends to introduce transition
614  * loops...
615  */
616  if (!pcmk_is_set(after_wrapper->type, pe_order_implies_then)) {
617  /* The order type between a group/clone and its child such as
618  * B.start-> B_child.start is:
619  * pe_order_implies_first_printed | pe_order_runnable_left
620  *
621  * Proceed through the ordering chain and build dependencies with
622  * its children.
623  */
624  if ((after->rsc == NULL)
625  || (after->rsc->variant < pe_group)
626  || (probe->rsc->parent == after->rsc)
627  || (after_wrapper->action->rsc == NULL)
628  || (after_wrapper->action->rsc->variant > pe_group)
629  || (after->rsc != after_wrapper->action->rsc->parent)) {
630  continue;
631  }
632 
633  /* Proceed to the children of a group or a non-interleaved clone.
634  * For an interleaved clone, proceed only to the relevant child.
635  */
636  if ((after->rsc->variant > pe_group) && interleave
637  && ((compatible_rsc == NULL)
638  || (compatible_rsc != after_wrapper->action->rsc))) {
639  continue;
640  }
641  }
642 
643  crm_trace("Recursively adding probe restart orderings for "
644  "'%s@%s then %s@%s' (type=%#.6x)",
645  after->uuid, pe__node_name(after->node),
646  after_wrapper->action->uuid,
647  pe__node_name(after_wrapper->action->node),
648  after_wrapper->type);
649 
650  add_restart_orderings_for_probe(probe, after_wrapper->action, data_set);
651  }
652 }
653 
660 static void
661 clear_actions_tracking_flag(pe_working_set_t *data_set)
662 {
663  GList *gIter = NULL;
664 
665  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
666  pe_action_t *action = (pe_action_t *) gIter->data;
667 
669  }
670 }
671 
679 static void
680 add_start_restart_orderings_for_rsc(pe_resource_t *rsc,
682 {
683  GList *probes = NULL;
684 
685  // For collective resources, order each instance recursively
686  if (rsc->variant != pe_native) {
687  g_list_foreach(rsc->children,
688  (GFunc) add_start_restart_orderings_for_rsc, data_set);
689  return;
690  }
691 
692  // Find all probes for given resource
693  probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
694 
695  // Add probe restart orderings for each probe found
696  for (GList *iter = probes; iter != NULL; iter = iter->next) {
697  pe_action_t *probe = (pe_action_t *) iter->data;
698 
699  for (GList *then_iter = probe->actions_after; then_iter != NULL;
700  then_iter = then_iter->next) {
701 
702  pe_action_wrapper_t *then = (pe_action_wrapper_t *) then_iter->data;
703 
704  add_start_orderings_for_probe(probe, then);
705  add_restart_orderings_for_probe(probe, then->action, data_set);
706  clear_actions_tracking_flag(data_set);
707  }
708  }
709 
710  g_list_free(probes);
711 }
712 
721 static void
722 order_then_probes(pe_working_set_t *data_set)
723 {
724 #if 0
725  /* Given an ordering "A then B", we would prefer to wait for A to be started
726  * before probing B.
727  *
728  * For example, if A is a filesystem which B can't even run without, it
729  * would be helpful if the author of B's agent could assume that A is
730  * running before B.monitor will be called.
731  *
732  * However, we can't _only_ probe after A is running, otherwise we wouldn't
733  * detect the state of B if A could not be started. We can't even do an
734  * opportunistic version of this, because B may be moving:
735  *
736  * A.stop -> A.start -> B.probe -> B.stop -> B.start
737  *
738  * and if we add B.stop -> A.stop here, we get a loop:
739  *
740  * A.stop -> A.start -> B.probe -> B.stop -> A.stop
741  *
742  * We could kill the "B.probe -> B.stop" dependency, but that could mean
743  * stopping B "too" soon, because B.start must wait for the probe, and
744  * we don't want to stop B if we can't start it.
745  *
746  * We could add the ordering only if A is an anonymous clone with
747  * clone-max == node-max (since we'll never be moving it). However, we could
748  * still be stopping one instance at the same time as starting another.
749  *
750  * The complexity of checking for allowed conditions combined with the ever
751  * narrowing use case suggests that this code should remain disabled until
752  * someone gets smarter.
753  */
754  for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
755  pe_resource_t *rsc = (pe_resource_t *) iter->data;
756 
757  pe_action_t *start = NULL;
758  GList *actions = NULL;
759  GList *probes = NULL;
760 
761  actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE);
762 
763  if (actions) {
764  start = actions->data;
765  g_list_free(actions);
766  }
767 
768  if (start == NULL) {
769  crm_err("No start action for %s", rsc->id);
770  continue;
771  }
772 
773  probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
774 
775  for (actions = start->actions_before; actions != NULL;
776  actions = actions->next) {
777 
778  pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data;
779 
780  pe_action_t *first = before->action;
781  pe_resource_t *first_rsc = first->rsc;
782 
783  if (first->required_runnable_before) {
784  for (GList *clone_actions = first->actions_before;
785  clone_actions != NULL;
786  clone_actions = clone_actions->next) {
787 
788  before = (pe_action_wrapper_t *) clone_actions->data;
789 
790  crm_trace("Testing '%s then %s' for %s",
791  first->uuid, before->action->uuid, start->uuid);
792 
793  CRM_ASSERT(before->action->rsc != NULL);
794  first_rsc = before->action->rsc;
795  break;
796  }
797 
798  } else if (!pcmk__str_eq(first->task, RSC_START, pcmk__str_none)) {
799  crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
800  }
801 
802  if (first_rsc == NULL) {
803  continue;
804 
805  } else if (pe__const_top_resource(first_rsc, false)
806  == pe__const_top_resource(start->rsc, false)) {
807  crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
808  continue;
809 
810  } else if (!pe_rsc_is_clone(pe__const_top_resource(first_rsc,
811  false))) {
812  crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
813  continue;
814  }
815 
816  crm_err("Applying %s before %s %d", first->uuid, start->uuid,
817  pe__const_top_resource(first_rsc, false)->variant);
818 
819  for (GList *probe_iter = probes; probe_iter != NULL;
820  probe_iter = probe_iter->next) {
821 
822  pe_action_t *probe = (pe_action_t *) probe_iter->data;
823 
824  crm_err("Ordering %s before %s", first->uuid, probe->uuid);
825  order_actions(first, probe, pe_order_optional);
826  }
827  }
828  }
829 #endif
830 }
831 
832 void
834 {
835  // Add orderings for "probe then X"
836  g_list_foreach(data_set->resources,
837  (GFunc) add_start_restart_orderings_for_rsc, data_set);
838  add_probe_orderings_for_stops(data_set);
839 
840  order_then_probes(data_set);
841 }
842 
851 void
853 {
854  // Schedule probes on each node in the cluster as needed
855  for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
856  pe_node_t *node = (pe_node_t *) iter->data;
857  const char *probed = NULL;
858 
859  if (!node->details->online) { // Don't probe offline nodes
860  if (pcmk__is_failed_remote_node(node)) {
861  pe_fence_node(data_set, node,
862  "the connection is unrecoverable", FALSE);
863  }
864  continue;
865 
866  } else if (node->details->unclean) { // ... or nodes that need fencing
867  continue;
868 
869  } else if (!node->details->rsc_discovery_enabled) {
870  // The user requested that probes not be done on this node
871  continue;
872  }
873 
874  /* This is no longer needed for live clusters, since the probe_complete
875  * node attribute will never be in the CIB. However this is still useful
876  * for processing old saved CIBs (< 1.1.14), including the
877  * reprobe-target_rc regression test.
878  */
879  probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
880  if (probed != NULL && crm_is_true(probed) == FALSE) {
881  pe_action_t *probe_op = NULL;
882 
883  probe_op = custom_action(NULL,
885  node->details->uname),
886  CRM_OP_REPROBE, node, FALSE, TRUE,
887  data_set);
890  continue;
891  }
892 
893  // Probe each resource in the cluster on this node, as needed
895  }
896 }
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:235
pe_action_t * lh_action
Definition: internal.h:205
#define RSC_STOP
Definition: crm.h:202
A dumping ground.
GHashTable * known_on
Definition: pe_types.h:399
bool pe__is_guest_or_remote_node(const pe_node_t *node)
Definition: remote.c:41
#define pe__set_action_flags(action, flags_to_set)
Definition: internal.h:89
#define CRM_OP_FENCE
Definition: crm.h:144
pe_resource_t * container
Definition: pe_types.h:412
G_GNUC_INTERNAL pe_resource_t * pcmk__find_compatible_instance(const pe_resource_t *match_rsc, const pe_resource_t *rsc, enum rsc_role_e role, bool current)
enum rsc_role_e role
Definition: pe_types.h:402
#define CRM_OP_REPROBE
Definition: crm.h:152
GList * children
Definition: pe_types.h:409
resource_alloc_functions_t * cmds
Definition: pe_types.h:359
GList * find_actions(GList *input, const char *key, const pe_node_t *on_node)
Definition: pe_actions.c:1327
Service active and promoted.
Definition: results.h:247
pe_resource_t * rsc
Definition: pe_types.h:433
enum rsc_role_e next_role
Definition: pe_types.h:403
gboolean exclusive_discover
Definition: pe_types.h:377
#define reload_key(rsc)
Definition: internal.h:410
pe_resource_t * remote_rsc
Definition: pe_types.h:253
GHashTable * meta
Definition: pe_types.h:405
void pcmk__schedule_probes(pe_working_set_t *data_set)
const pe_resource_t * pe__const_top_resource(const pe_resource_t *rsc, bool include_bundle)
Definition: complex.c:947
GList * actions
Definition: pe_types.h:187
pe_action_t * rh_action
Definition: internal.h:210
#define RSC_START
Definition: crm.h:199
pe_node_t * allocated_to
Definition: pe_types.h:395
pe_action_t * action
Definition: pe_types.h:557
bool pcmk__ends_with(const char *s, const char *match)
Definition: strings.c:536
gboolean remote_requires_reset
Definition: pe_types.h:247
const char * action
Definition: pcmk_fence.c:30
GList * resources
Definition: pe_types.h:181
GList * nodes
Definition: pe_types.h:180
const char * role2text(enum rsc_role_e role)
Definition: common.c:450
G_GNUC_INTERNAL bool pcmk__is_failed_remote_node(const pe_node_t *node)
#define pe_rsc_failed
Definition: pe_types.h:292
#define crm_debug(fmt, args...)
Definition: logging.h:382
pe_resource_t * uber_parent(pe_resource_t *rsc)
Definition: complex.c:922
pe_resource_t * pe__resource_contains_guest_node(const pe_working_set_t *data_set, const pe_resource_t *rsc)
Definition: remote.c:66
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:496
const char * pe_node_attribute_raw(const pe_node_t *node, const char *name)
Definition: common.c:558
#define XML_BOOLEAN_TRUE
Definition: msg_xml.h:159
bool pe__is_guest_node(const pe_node_t *node)
Definition: remote.c:33
void pe__add_action_expected_result(pe_action_t *action, int expected_result)
Definition: pe_actions.c:1676
char * task
Definition: pe_types.h:437
GList * actions_after
Definition: pe_types.h:471
#define pe__clear_action_flags(action, flags_to_clear)
Definition: internal.h:98
bool pcmk__probe_resource_list(GList *rscs, pe_node_t *node)
#define crm_trace(fmt, args...)
Definition: logging.h:383
void pe_fence_node(pe_working_set_t *data_set, pe_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:113
#define PCMK_RESOURCE_CLASS_STONITH
Definition: agents.h:31
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
GHashTable * meta
Definition: pe_types.h:447
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:121
struct pe_node_shared_s * details
Definition: pe_types.h:268
pe_node_t * node
Definition: pe_types.h:434
gboolean order_actions(pe_action_t *lh_action, pe_action_t *rh_action, enum pe_ordering order)
Definition: utils.c:488
unsigned long long flags
Definition: pe_types.h:373
const char * uname
Definition: pe_types.h:232
pe_working_set_t * data_set
#define XML_ATTR_TE_NOWAIT
Definition: msg_xml.h:427
pe_resource_t * lh_rsc
Definition: internal.h:204
pe_resource_t rsc2
GList * actions
Definition: pe_types.h:391
G_GNUC_INTERNAL void pcmk__new_ordering(pe_resource_t *first_rsc, char *first_task, pe_action_t *first_action, pe_resource_t *then_rsc, char *then_task, pe_action_t *then_action, uint32_t flags, pe_working_set_t *data_set)
char * uuid
Definition: pe_types.h:438
enum pe_obj_types variant
Definition: pe_types.h:356
int rsc_discover_mode
Definition: pe_types.h:269
Service safely stopped.
Definition: results.h:246
#define CRM_OP_SHUTDOWN
Definition: crm.h:143
const char * id
Definition: pe_types.h:231
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition: operations.c:42
gboolean rsc_discovery_enabled
Definition: pe_types.h:246
void pcmk__order_probes(pe_working_set_t *data_set)
bool(* create_probe)(pe_resource_t *rsc, pe_node_t *node)
pe_node_t * pe_find_node_id(const GList *node_list, const char *id)
Find a node by ID in a list of nodes.
Definition: status.c:448
Cluster status and scheduling.
gboolean is_remote_node
Definition: pe_types.h:376
GList * ordering_constraints
Definition: pe_types.h:183
void add_hash_param(GHashTable *hash, const char *name, const char *value)
Definition: common.c:500
bool pcmk__probe_rsc_on_node(pe_resource_t *rsc, pe_node_t *node)
#define crm_err(fmt, args...)
Definition: logging.h:377
#define CRM_ASSERT(expr)
Definition: results.h:42
#define RSC_STATUS
Definition: crm.h:213
#define RSC_PROMOTE
Definition: crm.h:205
pe_resource_t rsc1
pe_node_t * pe_find_node(const GList *node_list, const char *node_name)
Find a node by name in a list of nodes.
Definition: status.c:473
GList * running_on
Definition: pe_types.h:398
enum pe_action_flags flags
Definition: pe_types.h:442
#define CRM_OP_PROBED
Definition: crm.h:151
pe_working_set_t * cluster
Definition: pe_types.h:353
pe_resource_t * rh_rsc
Definition: internal.h:209
G_GNUC_INTERNAL void pcmk__order_vs_unfence(const pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order)
gboolean crm_is_true(const char *s)
Definition: strings.c:416
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:50
#define pe__set_order_flags(order_flags, flags_to_set)
Definition: internal.h:141
unsigned long long flags
Definition: pe_types.h:169
#define XML_RSC_ATTR_INTERLEAVE
Definition: msg_xml.h:240
GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1400
pe_action_t * custom_action(pe_resource_t *rsc, char *key, const char *task, const pe_node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set)
Create or update an action object.
Definition: pe_actions.c:942
enum pe_ordering type
Definition: pe_types.h:555
gboolean unclean
Definition: pe_types.h:240
#define pe_rsc_orphan
Definition: pe_types.h:272
gboolean online
Definition: pe_types.h:236
uint64_t flags
Definition: remote.c:215
pe_resource_t * parent
Definition: pe_types.h:354
#define RSC_DEMOTE
Definition: crm.h:207
#define XML_AGENT_ATTR_CLASS
Definition: msg_xml.h:282
char * id
Definition: pe_types.h:347
GHashTable * allowed_nodes
Definition: pe_types.h:400
#define pe_flag_startup_probes
Definition: pe_types.h:132