pacemaker  3.0.0-d8340737c4
Scalable High-Availability cluster resource manager
pcmk_sched_fencing.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2024 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <glib.h>
13 
14 #include <crm/crm.h>
15 #include <crm/pengine/status.h>
16 #include <pacemaker-internal.h>
17 #include "libpacemaker_private.h"
18 
28 static bool
29 rsc_is_known_on(const pcmk_resource_t *rsc, const pcmk_node_t *node)
30 {
31  const pcmk_resource_t *parent = rsc->priv->parent;
32 
33  if (g_hash_table_lookup(rsc->priv->probed_nodes,
34  node->priv->id) != NULL) {
35  return TRUE;
36 
37  } else if (pcmk__is_primitive(rsc) && pcmk__is_anonymous_clone(parent)
38  && (g_hash_table_lookup(parent->priv->probed_nodes,
39  node->priv->id) != NULL)) {
40  /* We check only the parent, not the uber-parent, because we cannot
41  * assume that the resource is known if it is in an anonymously cloned
42  * group (which may be only partially known).
43  */
44  return TRUE;
45  }
46  return FALSE;
47 }
48 
56 static void
57 order_start_vs_fencing(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
58 {
60 
61  CRM_CHECK(stonith_op && stonith_op->node, return);
62  target = stonith_op->node;
63 
64  for (GList *iter = rsc->priv->actions; iter != NULL; iter = iter->next) {
65  pcmk_action_t *action = iter->data;
66 
67  switch (action->needs) {
69  // Anything other than start or promote requires nothing
70  break;
71 
74  break;
75 
77  if (pcmk__str_eq(action->task, PCMK_ACTION_START,
79  && (g_hash_table_lookup(rsc->priv->allowed_nodes,
80  target->priv->id) != NULL)
81  && !rsc_is_known_on(rsc, target)) {
82 
83  /* If we don't know the status of the resource on the node
84  * we're about to shoot, we have to assume it may be active
85  * there. Order the resource start after the fencing. This
86  * is analogous to waiting for all the probes for a resource
87  * to complete before starting it.
88  *
89  * The most likely explanation is that the DC died and took
90  * its status with it.
91  */
92  pcmk__rsc_debug(rsc, "Ordering %s after %s recovery",
93  action->uuid, pcmk__node_name(target));
94  order_actions(stonith_op, action,
97  }
98  break;
99  }
100  }
101 }
102 
110 static void
111 order_stop_vs_fencing(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
112 {
113  GList *iter = NULL;
114  GList *action_list = NULL;
115  bool order_implicit = false;
116 
117  pcmk_resource_t *top = uber_parent(rsc);
118  pcmk_action_t *parent_stop = NULL;
120 
121  CRM_CHECK(stonith_op && stonith_op->node, return);
122  target = stonith_op->node;
123 
124  /* Get a list of stop actions potentially implied by the fencing */
125  action_list = pe__resource_actions(rsc, target, PCMK_ACTION_STOP, FALSE);
126 
127  /* If resource requires fencing, implicit actions must occur after fencing.
128  *
129  * Implied stops and demotes of resources running on guest nodes are always
130  * ordered after fencing, even if the resource does not require fencing,
131  * because guest node "fencing" is actually just a resource stop.
132  */
134  || pcmk__is_guest_or_bundle_node(target)) {
135 
136  order_implicit = true;
137  }
138 
139  if (action_list && order_implicit) {
140  parent_stop = find_first_action(top->priv->actions, NULL,
141  PCMK_ACTION_STOP, NULL);
142  }
143 
144  for (iter = action_list; iter != NULL; iter = iter->next) {
145  pcmk_action_t *action = iter->data;
146 
147  // The stop would never complete, so convert it into a pseudo-action.
150 
151  if (order_implicit) {
152  /* Order the stonith before the parent stop (if any).
153  *
154  * Also order the stonith before the resource stop, unless the
155  * resource is inside a bundle -- that would cause a graph loop.
156  * We can rely on the parent stop's ordering instead.
157  *
158  * User constraints must not order a resource in a guest node
159  * relative to the guest node container resource. The
160  * pcmk__ar_guest_allowed flag marks constraints as generated by the
161  * cluster and thus immune to that check (and is irrelevant if
162  * target is not a guest).
163  */
164  if (!pcmk__is_bundled(rsc)) {
166  }
167  order_actions(stonith_op, parent_stop, pcmk__ar_guest_allowed);
168  }
169 
170  if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
171  crm_notice("Stop of failed resource %s is implicit %s %s is fenced",
172  rsc->id, (order_implicit? "after" : "because"),
173  pcmk__node_name(target));
174  } else {
175  crm_info("%s is implicit %s %s is fenced",
176  action->uuid, (order_implicit? "after" : "because"),
177  pcmk__node_name(target));
178  }
179 
180  if (pcmk_is_set(rsc->flags, pcmk__rsc_notify)) {
181  pe__order_notifs_after_fencing(action, rsc, stonith_op);
182  }
183 
184 #if 0
185  /* It might be a good idea to stop healthy resources on a node about to
186  * be fenced, when possible.
187  *
188  * However, fencing must be done before a failed resource's
189  * (pseudo-)stop action, so that could create a loop. For example, given
190  * a group of A and B running on node N with a failed stop of B:
191  *
192  * fence N -> stop B (pseudo-op) -> stop A -> fence N
193  *
194  * The block below creates the stop A -> fence N ordering and therefore
195  * must (at least for now) be disabled. Instead, run the block above and
196  * treat all resources on N as B would be (i.e., as a pseudo-op after
197  * the fencing).
198  *
199  * @TODO Maybe break the "A requires B" dependency in
200  * pcmk__update_action_for_orderings() and use this block for healthy
201  * resources instead of the above.
202  */
203  crm_info("Moving healthy resource %s off %s before fencing",
204  rsc->id, pcmk__node_name(node));
205  pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL,
206  strdup(PCMK_ACTION_STONITH), stonith_op,
207  pcmk__ar_ordered, rsc->private->scheduler);
208 #endif
209  }
210 
211  g_list_free(action_list);
212 
213  /* Get a list of demote actions potentially implied by the fencing */
214  action_list = pe__resource_actions(rsc, target, PCMK_ACTION_DEMOTE, FALSE);
215 
216  for (iter = action_list; iter != NULL; iter = iter->next) {
217  pcmk_action_t *action = iter->data;
218 
219  if (!(action->node->details->online) || action->node->details->unclean
220  || pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
221 
222  if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
223  pcmk__rsc_info(rsc,
224  "Demote of failed resource %s is implicit "
225  "after %s is fenced",
226  rsc->id, pcmk__node_name(target));
227  } else {
228  pcmk__rsc_info(rsc, "%s is implicit after %s is fenced",
229  action->uuid, pcmk__node_name(target));
230  }
231 
232  /* The demote would never complete and is now implied by the
233  * fencing, so convert it into a pseudo-action.
234  */
237 
238  if (pcmk__is_bundled(rsc)) {
239  // Recovery will be ordered as usual after parent's implied stop
240 
241  } else if (order_implicit) {
242  order_actions(stonith_op, action,
244  }
245  }
246  }
247 
248  g_list_free(action_list);
249 }
250 
258 static void
259 rsc_stonith_ordering(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
260 {
261  if (rsc->priv->children != NULL) {
262 
263  for (GList *iter = rsc->priv->children;
264  iter != NULL; iter = iter->next) {
265 
266  pcmk_resource_t *child_rsc = iter->data;
267 
268  rsc_stonith_ordering(child_rsc, stonith_op);
269  }
270 
271  } else if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
272  pcmk__rsc_trace(rsc,
273  "Skipping fencing constraints for unmanaged resource: "
274  "%s", rsc->id);
275 
276  } else {
277  order_start_vs_fencing(rsc, stonith_op);
278  order_stop_vs_fencing(rsc, stonith_op);
279  }
280 }
281 
293 void
295 {
296  CRM_CHECK(stonith_op && scheduler, return);
297  for (GList *r = scheduler->priv->resources; r != NULL; r = r->next) {
298  rsc_stonith_ordering((pcmk_resource_t *) r->data, stonith_op);
299  }
300 }
301 
311 void
314  enum pcmk__action_relation_flags order)
315 {
316  /* When unfencing is in use, we order unfence actions before any probe or
317  * start of resources that require unfencing, and also of fence devices.
318  *
319  * This might seem to violate the principle that fence devices require
320  * only quorum. However, fence agents that unfence often don't have enough
321  * information to even probe or start unless the node is first unfenced.
322  */
324  && pcmk_is_set(rsc->priv->scheduler->flags,
327 
328  /* Start with an optional ordering. Requiring unfencing would result in
329  * the node being unfenced, and all its resources being stopped,
330  * whenever a new resource is added -- which would be highly suboptimal.
331  */
332  pcmk_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, TRUE, NULL,
333  FALSE, node->priv->scheduler);
334 
335  order_actions(unfence, action, order);
336 
337  if (!pcmk__node_unfenced(node)) {
338  // But unfencing is required if it has never been done
339  char *reason = crm_strdup_printf("required by %s %s",
340  rsc->id, action->task);
341 
342  trigger_unfencing(NULL, node, reason, NULL,
343  node->priv->scheduler);
344  free(reason);
345  }
346  }
347 }
348 
355 void
357 {
358  pcmk_resource_t *launcher = NULL;
359  pcmk_action_t *stop = NULL;
360  pcmk_action_t *stonith_op = NULL;
361 
362  /* The fence action is just a label; we don't do anything differently for
363  * off vs. reboot. We specify it explicitly, rather than let it default to
364  * cluster's default action, because we are not _initiating_ fencing -- we
365  * are creating a pseudo-event to describe fencing that is already occurring
366  * by other means (launcher recovery).
367  */
368  const char *fence_action = PCMK_ACTION_OFF;
369 
370  pcmk__assert(node != NULL);
371 
372  /* Check whether guest's launcher has any explicit stop or start (the stop
373  * may be implied by fencing of the guest's host).
374  */
375  launcher = node->priv->remote->priv->launcher;
376  if (launcher != NULL) {
377  stop = find_first_action(launcher->priv->actions, NULL,
378  PCMK_ACTION_STOP, NULL);
379 
380  if (find_first_action(launcher->priv->actions, NULL,
381  PCMK_ACTION_START, NULL)) {
382  fence_action = PCMK_ACTION_REBOOT;
383  }
384  }
385 
386  /* Create a fence pseudo-event, so we have an event to order actions
387  * against, and the controller can always detect it.
388  */
389  stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean",
390  FALSE, node->priv->scheduler);
391  pcmk__set_action_flags(stonith_op,
393 
394  /* We want to imply stops/demotes after the guest is stopped, not wait until
395  * it is restarted, so we always order pseudo-fencing after stop, not start
396  * (even though start might be closer to what is done for a real reboot).
397  */
398  if ((stop != NULL) && pcmk_is_set(stop->flags, pcmk__action_pseudo)) {
399  pcmk_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE,
400  NULL, FALSE,
401  node->priv->scheduler);
402 
403  crm_info("Implying guest %s is down (action %d) after %s fencing",
404  pcmk__node_name(node), stonith_op->id,
405  pcmk__node_name(stop->node));
406  order_actions(parent_stonith_op, stonith_op,
409 
410  } else if (stop) {
411  order_actions(stop, stonith_op,
414  crm_info("Implying guest %s is down (action %d) "
415  "after launcher %s is stopped (action %d)",
416  pcmk__node_name(node), stonith_op->id,
417  launcher->id, stop->id);
418  } else {
419  /* If we're fencing the guest node but there's no stop for the guest
420  * resource, we must think the guest is already stopped. However, we may
421  * think so because its resource history was just cleaned. To avoid
422  * unnecessarily considering the guest node down if it's really up,
423  * order the pseudo-fencing after any stop of the connection resource,
424  * which will be ordered after any launcher (re-)probe.
425  */
426  stop = find_first_action(node->priv->remote->priv->actions,
427  NULL, PCMK_ACTION_STOP, NULL);
428 
429  if (stop) {
430  order_actions(stop, stonith_op, pcmk__ar_ordered);
431  crm_info("Implying guest %s is down (action %d) "
432  "after connection is stopped (action %d)",
433  pcmk__node_name(node), stonith_op->id, stop->id);
434  } else {
435  /* Not sure why we're fencing, but everything must already be
436  * cleanly stopped.
437  */
438  crm_info("Implying guest %s is down (action %d) ",
439  pcmk__node_name(node), stonith_op->id);
440  }
441  }
442 
443  // Order/imply other actions relative to pseudo-fence as with real fence
444  pcmk__order_vs_fence(stonith_op, node->priv->scheduler);
445 }
446 
456 bool
458 {
459  const char *unfenced = pcmk__node_attr(node, CRM_ATTR_UNFENCED, NULL,
461 
462  return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches);
463 }
464 
472 void
473 pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
474 {
475  pcmk_node_t *node = (pcmk_node_t *) data;
476  pcmk_resource_t *rsc = (pcmk_resource_t *) user_data;
477 
478  pcmk_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, true, NULL,
479  false, rsc->priv->scheduler);
480 
481  crm_debug("Ordering any stops of %s before %s, and any starts after",
482  rsc->id, unfence->uuid);
483 
484  /*
485  * It would be more efficient to order clone resources once,
486  * rather than order each instance, but ordering the instance
487  * allows us to avoid unnecessary dependencies that might conflict
488  * with user constraints.
489  *
490  * @TODO: This constraint can still produce a transition loop if the
491  * resource has a stop scheduled on the node being unfenced, and
492  * there is a user ordering constraint to start some other resource
493  * (which will be ordered after the unfence) before stopping this
494  * resource. An example is "start some slow-starting cloned service
495  * before stopping an associated virtual IP that may be moving to
496  * it":
497  * stop this -> unfencing -> start that -> stop this
498  */
499  pcmk__new_ordering(rsc, stop_key(rsc), NULL,
500  NULL, strdup(unfence->uuid), unfence,
502  rsc->priv->scheduler);
503 
504  pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
505  rsc, start_key(rsc), NULL,
508  rsc->priv->scheduler);
509 }
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:213
Relation applies only if actions are on same node.
A dumping ground.
void trigger_unfencing(pcmk_resource_t *rsc, pcmk_node_t *node, const char *reason, pcmk_action_t *dependency, pcmk_scheduler_t *scheduler)
Definition: utils.c:610
#define crm_notice(fmt, args...)
Definition: logging.h:365
&#39;then&#39; is runnable (and migratable) only if &#39;first&#39; is runnable
void pcmk__fence_guest(pcmk_node_t *node)
void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
char data[0]
Definition: cpg.c:58
G_GNUC_INTERNAL void pcmk__new_ordering(pcmk_resource_t *first_rsc, char *first_task, pcmk_action_t *first_action, pcmk_resource_t *then_rsc, char *then_task, pcmk_action_t *then_action, uint32_t flags, pcmk_scheduler_t *sched)
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
Definition: complex.c:1017
pcmk_resource_t * parent
#define pcmk__rsc_trace(rsc, fmt, args...)
#define PCMK_ACTION_ON
Definition: actions.h:55
#define pcmk__rsc_info(rsc, fmt, args...)
pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler)
Definition: pe_actions.c:1245
pcmk__scheduler_private_t * priv
Definition: scheduler.h:99
uint64_t flags
Definition: scheduler.h:89
If &#39;first&#39; is required, &#39;then&#39; action for instance on same node is.
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1517
Ordering applies even if &#39;first&#39; runs on guest node created by &#39;then&#39;.
const char * action
Definition: pcmk_fence.c:32
#define pcmk__rsc_debug(rsc, fmt, args...)
pcmk__action_relation_flags
pcmk__node_private_t * priv
Definition: nodes.h:85
#define CRM_ATTR_UNFENCED
Definition: crm.h:99
#define PCMK_ACTION_DEMOTE
Definition: actions.h:40
#define PCMK_ACTION_REBOOT
Definition: actions.h:59
#define crm_debug(fmt, args...)
Definition: logging.h:370
pcmk_scheduler_t * scheduler
Actions are ordered (optionally, if no other flags are set)
void pcmk__order_vs_unfence(const pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_action_t *action, enum pcmk__action_relation_flags order)
#define stop_key(rsc)
Definition: internal.h:196
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:80
#define PCMK_ACTION_START
Definition: actions.h:63
pcmk__resource_private_t * priv
Definition: resources.h:61
#define PCMK_ACTION_STOP
Definition: actions.h:66
#define PCMK_ACTION_STONITH
Definition: actions.h:65
pcmk_scheduler_t * scheduler
gboolean order_actions(pcmk_action_t *first, pcmk_action_t *then, uint32_t flags)
Definition: utils.c:465
#define pcmk__assert(expr)
const char * target
Definition: pcmk_fence.c:31
void pcmk__order_vs_fence(pcmk_action_t *stonith_op, pcmk_scheduler_t *scheduler)
pcmk_node_t * node
Cluster status and scheduling.
void pe__order_notifs_after_fencing(const pcmk_action_t *action, pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
Definition: pe_notif.c:1000
pcmk_action_t * find_first_action(const GList *input, const char *uuid, const char *task, const pcmk_node_t *on_node)
Definition: pe_actions.c:1416
pcmk_scheduler_t * scheduler
pcmk_resource_t * remote
pcmk_resource_t * launcher
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
Definition: attrs.c:114
#define pcmk__set_action_flags(action, flags_to_set)
unsigned long long flags
Definition: resources.h:69
#define PCMK_ACTION_OFF
Definition: actions.h:54
#define start_key(rsc)
Definition: internal.h:198
const char * parent
Definition: cib.c:27
#define crm_info(fmt, args...)
Definition: logging.h:367
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
bool pcmk__node_unfenced(const pcmk_node_t *node)