pacemaker  2.1.3-ea053b43a
Scalable High-Availability cluster resource manager
pcmk_sched_probes.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2022 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <glib.h>
13 
14 #include <crm/crm.h>
15 #include <crm/pengine/status.h>
16 #include <pacemaker-internal.h>
17 #include "libpacemaker_private.h"
18 
28 static bool
29 probe_needed_before_action(pe_action_t *probe, pe_action_t *then)
30 {
31  // Probes on a node are performed after unfencing it, not before
32  if (pcmk__str_eq(then->task, CRM_OP_FENCE, pcmk__str_casei)
33  && (probe->node != NULL) && (then->node != NULL)
34  && (probe->node->details == then->node->details)) {
35  const char *op = g_hash_table_lookup(then->meta, "stonith_action");
36 
37  if (pcmk__str_eq(op, "on", pcmk__str_casei)) {
38  return false;
39  }
40  }
41 
42  // Probes should be done on a node before shutting it down
43  if (pcmk__str_eq(then->task, CRM_OP_SHUTDOWN, pcmk__str_none)
44  && (probe->node != NULL) && (then->node != NULL)
45  && (probe->node->details != then->node->details)) {
46  return false;
47  }
48 
49  // Otherwise probes should always be done before any other action
50  return true;
51 }
52 
66 static void
67 add_probe_orderings_for_stops(pe_working_set_t *data_set)
68 {
69  for (GList *iter = data_set->ordering_constraints; iter != NULL;
70  iter = iter->next) {
71 
72  pe__ordering_t *order = iter->data;
73  enum pe_ordering order_type = pe_order_optional;
74  GList *probes = NULL;
75  GList *then_actions = NULL;
76 
77  // Skip disabled orderings
78  if (order->type == pe_order_none) {
79  continue;
80  }
81 
82  // Skip non-resource orderings, and orderings for the same resource
83  if ((order->lh_rsc == NULL) || (order->lh_rsc == order->rh_rsc)) {
84  continue;
85  }
86 
87  // Skip invalid orderings (shouldn't be possible)
88  if (((order->lh_action == NULL) && (order->lh_action_task == NULL)) ||
89  ((order->rh_action == NULL) && (order->rh_action_task == NULL))) {
90  continue;
91  }
92 
93  // Skip orderings for first actions other than stop
94  if ((order->lh_action != NULL)
95  && !pcmk__str_eq(order->lh_action->task, RSC_STOP, pcmk__str_none)) {
96  continue;
97  } else if ((order->lh_action == NULL)
98  && !pcmk__ends_with(order->lh_action_task, "_" RSC_STOP "_0")) {
99  continue;
100  }
101 
102  /* Do not imply a probe ordering for a resource inside of a stopping
103  * container. Otherwise, it might introduce a transition loop, since a
104  * probe could be scheduled after the container starts again.
105  */
106  if ((order->rh_rsc != NULL)
107  && (order->lh_rsc->container == order->rh_rsc)) {
108 
109  if ((order->rh_action != NULL)
110  && pcmk__str_eq(order->rh_action->task, RSC_STOP,
111  pcmk__str_none)) {
112  continue;
113  } else if ((order->rh_action == NULL)
115  "_" RSC_STOP "_0")) {
116  continue;
117  }
118  }
119 
120  // Preserve certain order options for future filtering
122  pe__set_order_flags(order_type,
124  }
125  if (pcmk_is_set(order->type, pe_order_same_node)) {
127  }
128 
129  // Preserve certain order types for future filtering
130  if ((order->type == pe_order_anti_colocation)
131  || (order->type == pe_order_load)) {
132  order_type = order->type;
133  }
134 
135  // List all scheduled probes for the first resource
136  probes = pe__resource_actions(order->lh_rsc, NULL, RSC_STATUS, FALSE);
137  if (probes == NULL) { // There aren't any
138  continue;
139  }
140 
141  // List all relevant "then" actions
142  if (order->rh_action != NULL) {
143  then_actions = g_list_prepend(NULL, order->rh_action);
144 
145  } else if (order->rh_rsc != NULL) {
146  then_actions = find_actions(order->rh_rsc->actions,
147  order->rh_action_task, NULL);
148  if (then_actions == NULL) { // There aren't any
149  g_list_free(probes);
150  continue;
151  }
152  }
153 
154  crm_trace("Implying 'probe then' orderings for '%s then %s' "
155  "(id=%d, type=%.6x)",
156  order->lh_action? order->lh_action->uuid : order->lh_action_task,
157  order->rh_action? order->rh_action->uuid : order->rh_action_task,
158  order->id, order->type);
159 
160  for (GList *probe_iter = probes; probe_iter != NULL;
161  probe_iter = probe_iter->next) {
162 
163  pe_action_t *probe = (pe_action_t *) probe_iter->data;
164 
165  for (GList *then_iter = then_actions; then_iter != NULL;
166  then_iter = then_iter->next) {
167 
168  pe_action_t *then = (pe_action_t *) then_iter->data;
169 
170  if (probe_needed_before_action(probe, then)) {
171  order_actions(probe, then, order_type);
172  }
173  }
174  }
175 
176  g_list_free(then_actions);
177  g_list_free(probes);
178  }
179 }
180 
194 static void
195 add_restart_orderings_for_probe(pe_action_t *probe, pe_action_t *after,
197 {
198  GList *iter = NULL;
199  bool interleave = false;
200  pe_resource_t *compatible_rsc = NULL;
201 
202  // Validate that this is a resource probe followed by some action
203  if ((after == NULL) || (probe == NULL) || (probe->rsc == NULL)
204  || (probe->rsc->variant != pe_native)
205  || !pcmk__str_eq(probe->task, RSC_STATUS, pcmk__str_casei)) {
206  return;
207  }
208 
209  // Avoid running into any possible loop
210  if (pcmk_is_set(after->flags, pe_action_tracking)) {
211  return;
212  }
214 
215  crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
216  probe->uuid,
217  ((probe->node == NULL)? "" : probe->node->details->uname),
218  after->uuid,
219  ((after->node == NULL)? "" : after->node->details->uname));
220 
221  /* Add restart orderings if "then" is for a different primitive.
222  * Orderings for collective resources will be added later.
223  */
224  if ((after->rsc != NULL) && (after->rsc->variant == pe_native)
225  && (probe->rsc != after->rsc)) {
226 
227  GList *then_actions = NULL;
228 
229  if (pcmk__str_eq(after->task, RSC_START, pcmk__str_casei)) {
230  then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP,
231  FALSE);
232 
233  } else if (pcmk__str_eq(after->task, RSC_PROMOTE, pcmk__str_casei)) {
234  then_actions = pe__resource_actions(after->rsc, NULL,
235  RSC_DEMOTE, FALSE);
236  }
237 
238  for (iter = then_actions; iter != NULL; iter = iter->next) {
239  pe_action_t *then = (pe_action_t *) iter->data;
240 
241  // Skip pseudo-actions (for example, those implied by fencing)
242  if (!pcmk_is_set(then->flags, pe_action_pseudo)) {
243  order_actions(probe, then, pe_order_optional);
244  }
245  }
246  g_list_free(then_actions);
247  }
248 
249  /* Detect whether "then" is an interleaved clone action. For these, we want
250  * to add orderings only for the relevant instance.
251  */
252  if ((after->rsc != NULL)
253  && (after->rsc->variant > pe_group)) {
254  const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
256 
257  interleave = crm_is_true(interleave_s);
258  if (interleave) {
259  compatible_rsc = find_compatible_child(probe->rsc,
260  after->rsc,
262  FALSE, data_set);
263  }
264  }
265 
266  /* Now recursively do the same for all actions ordered after "then". This
267  * also handles collective resources since the collective action will be
268  * ordered before its individual instances' actions.
269  */
270  for (iter = after->actions_after; iter != NULL; iter = iter->next) {
271  pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) iter->data;
272 
273  /* pe_order_implies_then is the reason why a required A.start
274  * implies/enforces B.start to be required too, which is the cause of
275  * B.restart/re-promote.
276  *
277  * Not sure about pe_order_implies_then_on_node though. It's now only
278  * used for unfencing case, which tends to introduce transition
279  * loops...
280  */
281  if (!pcmk_is_set(after_wrapper->type, pe_order_implies_then)) {
282  /* The order type between a group/clone and its child such as
283  * B.start-> B_child.start is:
284  * pe_order_implies_first_printed | pe_order_runnable_left
285  *
286  * Proceed through the ordering chain and build dependencies with
287  * its children.
288  */
289  if ((after->rsc == NULL)
290  || (after->rsc->variant < pe_group)
291  || (probe->rsc->parent == after->rsc)
292  || (after_wrapper->action->rsc == NULL)
293  || (after_wrapper->action->rsc->variant > pe_group)
294  || (after->rsc != after_wrapper->action->rsc->parent)) {
295  continue;
296  }
297 
298  /* Proceed to the children of a group or a non-interleaved clone.
299  * For an interleaved clone, proceed only to the relevant child.
300  */
301  if ((after->rsc->variant > pe_group) && interleave
302  && ((compatible_rsc == NULL)
303  || (compatible_rsc != after_wrapper->action->rsc))) {
304  continue;
305  }
306  }
307 
308  crm_trace("Recursively adding probe restart orderings for "
309  "'%s@%s then %s@%s' (type=%#.6x)",
310  after->uuid,
311  ((after->node == NULL)? "" : after->node->details->uname),
312  after_wrapper->action->uuid,
313  ((after_wrapper->action->node == NULL)? "" : after_wrapper->action->node->details->uname),
314  after_wrapper->type);
315 
316  add_restart_orderings_for_probe(probe, after_wrapper->action, data_set);
317  }
318 }
319 
326 static void
327 clear_actions_tracking_flag(pe_working_set_t *data_set)
328 {
329  GList *gIter = NULL;
330 
331  for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
332  pe_action_t *action = (pe_action_t *) gIter->data;
333 
335  }
336 }
337 
345 static void
346 add_restart_orderings_for_rsc(pe_resource_t *rsc, pe_working_set_t *data_set)
347 {
348  GList *probes = NULL;
349 
350  // For collective resources, order each instance recursively
351  if (rsc->variant != pe_native) {
352  g_list_foreach(rsc->children, (GFunc) add_restart_orderings_for_rsc,
353  data_set);
354  return;
355  }
356 
357  // Find all probes for given resource
358  probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
359 
360  // Add probe restart orderings for each probe found
361  for (GList *iter = probes; iter != NULL; iter = iter->next) {
362  pe_action_t *probe = (pe_action_t *) iter->data;
363 
364  for (GList *then_iter = probe->actions_after; then_iter != NULL;
365  then_iter = then_iter->next) {
366 
367  pe_action_wrapper_t *then = (pe_action_wrapper_t *) then_iter->data;
368 
369  add_restart_orderings_for_probe(probe, then->action, data_set);
370  clear_actions_tracking_flag(data_set);
371  }
372  }
373 
374  g_list_free(probes);
375 }
376 
385 static void
386 order_then_probes(pe_working_set_t *data_set)
387 {
388 #if 0
389  /* Given an ordering "A then B", we would prefer to wait for A to be started
390  * before probing B.
391  *
392  * For example, if A is a filesystem which B can't even run without, it
393  * would be helpful if the author of B's agent could assume that A is
394  * running before B.monitor will be called.
395  *
396  * However, we can't _only_ probe after A is running, otherwise we wouldn't
397  * detect the state of B if A could not be started. We can't even do an
398  * opportunistic version of this, because B may be moving:
399  *
400  * A.stop -> A.start -> B.probe -> B.stop -> B.start
401  *
402  * and if we add B.stop -> A.stop here, we get a loop:
403  *
404  * A.stop -> A.start -> B.probe -> B.stop -> A.stop
405  *
406  * We could kill the "B.probe -> B.stop" dependency, but that could mean
407  * stopping B "too" soon, because B.start must wait for the probe, and
408  * we don't want to stop B if we can't start it.
409  *
410  * We could add the ordering only if A is an anonymous clone with
411  * clone-max == node-max (since we'll never be moving it). However, we could
412  * still be stopping one instance at the same time as starting another.
413  *
414  * The complexity of checking for allowed conditions combined with the ever
415  * narrowing use case suggests that this code should remain disabled until
416  * someone gets smarter.
417  */
418  for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
419  pe_resource_t *rsc = (pe_resource_t *) iter->data;
420 
421  pe_action_t *start = NULL;
422  GList *actions = NULL;
423  GList *probes = NULL;
424 
425  actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE);
426 
427  if (actions) {
428  start = actions->data;
429  g_list_free(actions);
430  }
431 
432  if (start == NULL) {
433  crm_err("No start action for %s", rsc->id);
434  continue;
435  }
436 
437  probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
438 
439  for (actions = start->actions_before; actions != NULL;
440  actions = actions->next) {
441 
442  pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data;
443 
444  pe_action_t *first = before->action;
445  pe_resource_t *first_rsc = first->rsc;
446 
447  if (first->required_runnable_before) {
448  for (GList *clone_actions = first->actions_before;
449  clone_actions != NULL;
450  clone_actions = clone_actions->next) {
451 
452  before = (pe_action_wrapper_t *) clone_actions->data;
453 
454  crm_trace("Testing '%s then %s' for %s",
455  first->uuid, before->action->uuid, start->uuid);
456 
457  CRM_ASSERT(before->action->rsc != NULL);
458  first_rsc = before->action->rsc;
459  break;
460  }
461 
462  } else if (!pcmk__str_eq(first->task, RSC_START, pcmk__str_none)) {
463  crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
464  }
465 
466  if (first_rsc == NULL) {
467  continue;
468 
469  } else if (uber_parent(first_rsc) == uber_parent(start->rsc)) {
470  crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
471  continue;
472 
473  } else if (!pe_rsc_is_clone(uber_parent(first_rsc))) {
474  crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
475  continue;
476  }
477 
478  crm_err("Applying %s before %s %d", first->uuid, start->uuid,
479  uber_parent(first_rsc)->variant);
480 
481  for (GList *probe_iter = probes; probe_iter != NULL;
482  probe_iter = probe_iter->next) {
483 
484  pe_action_t *probe = (pe_action_t *) probe_iter->data;
485 
486  crm_err("Ordering %s before %s", first->uuid, probe->uuid);
487  order_actions(first, probe, pe_order_optional);
488  }
489  }
490  }
491 #endif
492 }
493 
494 void
496 {
497  // Add orderings for "probe then X"
498  g_list_foreach(data_set->resources, (GFunc) add_restart_orderings_for_rsc,
499  data_set);
500  add_probe_orderings_for_stops(data_set);
501 
502  order_then_probes(data_set);
503 }
504 
513 void
515 {
516  // Schedule probes on each node in the cluster as needed
517  for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
518  pe_node_t *node = (pe_node_t *) iter->data;
519  const char *probed = NULL;
520 
521  if (!node->details->online) { // Don't probe offline nodes
522  if (pcmk__is_failed_remote_node(node)) {
523  pe_fence_node(data_set, node,
524  "the connection is unrecoverable", FALSE);
525  }
526  continue;
527 
528  } else if (node->details->unclean) { // ... or nodes that need fencing
529  continue;
530 
531  } else if (!node->details->rsc_discovery_enabled) {
532  // The user requested that probes not be done on this node
533  continue;
534  }
535 
536  /* This is no longer needed for live clusters, since the probe_complete
537  * node attribute will never be in the CIB. However this is still useful
538  * for processing old saved CIBs (< 1.1.14), including the
539  * reprobe-target_rc regression test.
540  */
541  probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
542  if (probed != NULL && crm_is_true(probed) == FALSE) {
543  pe_action_t *probe_op = NULL;
544 
545  probe_op = custom_action(NULL,
547  node->details->uname),
548  CRM_OP_REPROBE, node, FALSE, TRUE,
549  data_set);
552  continue;
553  }
554 
555  // Probe each resource in the cluster on this node, as needed
556  for (GList *rsc_iter = data_set->resources; rsc_iter != NULL;
557  rsc_iter = rsc_iter->next) {
558  pe_resource_t *rsc = (pe_resource_t *) rsc_iter->data;
559 
560  rsc->cmds->create_probe(rsc, node, NULL, FALSE, data_set);
561  }
562  }
563 }
pe_action_t * lh_action
Definition: internal.h:185
#define RSC_STOP
Definition: crm.h:204
A dumping ground.
#define pe__set_action_flags(action, flags_to_set)
Definition: internal.h:61
#define CRM_OP_FENCE
Definition: crm.h:145
pe_resource_t * container
Definition: pe_types.h:394
#define CRM_OP_REPROBE
Definition: crm.h:154
GList * children
Definition: pe_types.h:391
resource_alloc_functions_t * cmds
Definition: pe_types.h:348
GList * find_actions(GList *input, const char *key, const pe_node_t *on_node)
Definition: utils.c:1571
pe_resource_t * rsc
Definition: pe_types.h:424
G_GNUC_INTERNAL bool pcmk__is_failed_remote_node(pe_node_t *node)
GHashTable * meta
Definition: pe_types.h:387
void pcmk__schedule_probes(pe_working_set_t *data_set)
GList * actions
Definition: pe_types.h:171
pe_action_t * rh_action
Definition: internal.h:190
#define RSC_START
Definition: crm.h:201
pe_action_t * action
Definition: pe_types.h:548
bool pcmk__ends_with(const char *s, const char *match)
Definition: strings.c:536
const char * action
Definition: pcmk_fence.c:29
GList * resources
Definition: pe_types.h:165
GList * nodes
Definition: pe_types.h:164
gboolean(* create_probe)(pe_resource_t *, pe_node_t *, pe_action_t *, gboolean, pe_working_set_t *)
pe_resource_t * uber_parent(pe_resource_t *rsc)
Definition: complex.c:913
#define XML_BOOLEAN_TRUE
Definition: msg_xml.h:146
char * task
Definition: pe_types.h:428
GList * actions_after
Definition: pe_types.h:462
#define pe__clear_action_flags(action, flags_to_clear)
Definition: internal.h:70
#define crm_trace(fmt, args...)
Definition: logging.h:364
void pe_fence_node(pe_working_set_t *data_set, pe_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:95
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
GHashTable * meta
Definition: pe_types.h:438
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:122
struct pe_node_shared_s * details
Definition: pe_types.h:252
pe_node_t * node
Definition: pe_types.h:425
gboolean order_actions(pe_action_t *lh_action, pe_action_t *rh_action, enum pe_ordering order)
Definition: utils.c:1906
const char * uname
Definition: pe_types.h:216
pe_working_set_t * data_set
#define XML_ATTR_TE_NOWAIT
Definition: msg_xml.h:407
pe_resource_t * lh_rsc
Definition: internal.h:184
const char * pe_node_attribute_raw(pe_node_t *node, const char *name)
Definition: common.c:560
GList * actions
Definition: pe_types.h:373
enum pe_ordering type
Definition: internal.h:181
char * uuid
Definition: pe_types.h:429
enum pe_obj_types variant
Definition: pe_types.h:345
#define CRM_OP_SHUTDOWN
Definition: crm.h:144
pe_resource_t * find_compatible_child(pe_resource_t *local_child, pe_resource_t *rsc, enum rsc_role_e filter, gboolean current, pe_working_set_t *data_set)
gboolean rsc_discovery_enabled
Definition: pe_types.h:230
void pcmk__order_probes(pe_working_set_t *data_set)
Cluster status and scheduling.
GList * ordering_constraints
Definition: pe_types.h:167
void add_hash_param(GHashTable *hash, const char *name, const char *value)
Definition: common.c:504
#define crm_err(fmt, args...)
Definition: logging.h:358
#define CRM_ASSERT(expr)
Definition: results.h:42
#define RSC_STATUS
Definition: crm.h:215
#define RSC_PROMOTE
Definition: crm.h:207
enum pe_action_flags flags
Definition: pe_types.h:433
#define CRM_OP_PROBED
Definition: crm.h:153
pe_resource_t * rh_rsc
Definition: internal.h:189
gboolean crm_is_true(const char *s)
Definition: strings.c:416
#define pe__set_order_flags(order_flags, flags_to_set)
Definition: internal.h:113
#define XML_RSC_ATTR_INTERLEAVE
Definition: msg_xml.h:230
GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: utils.c:1644
enum pe_ordering type
Definition: pe_types.h:546
gboolean unclean
Definition: pe_types.h:224
pe_ordering
Definition: pe_types.h:497
gboolean online
Definition: pe_types.h:220
pe_resource_t * parent
Definition: pe_types.h:343
#define RSC_DEMOTE
Definition: crm.h:209
char * id
Definition: pe_types.h:336
pe_action_t * custom_action(pe_resource_t *rsc, char *key, const char *task, pe_node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set)
Create or update an action object.
Definition: utils.c:730