pacemaker  2.1.6-802a72226b
Scalable High-Availability cluster resource manager
pcmk_sched_fencing.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2023 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <glib.h>
13 
14 #include <crm/crm.h>
15 #include <crm/pengine/status.h>
16 #include <pacemaker-internal.h>
17 #include "libpacemaker_private.h"
18 
28 static bool
29 rsc_is_known_on(const pe_resource_t *rsc, const pe_node_t *node)
30 {
31  if (pe_hash_table_lookup(rsc->known_on, node->details->id)) {
32  return TRUE;
33 
34  } else if ((rsc->variant == pe_native)
35  && pe_rsc_is_anon_clone(rsc->parent)
36  && pe_hash_table_lookup(rsc->parent->known_on, node->details->id)) {
37  /* We check only the parent, not the uber-parent, because we cannot
38  * assume that the resource is known if it is in an anonymously cloned
39  * group (which may be only partially known).
40  */
41  return TRUE;
42  }
43  return FALSE;
44 }
45 
53 static void
54 order_start_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op)
55 {
57  GList *gIter = NULL;
58 
59  CRM_CHECK(stonith_op && stonith_op->node, return);
60  target = stonith_op->node;
61 
62  for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
63  pe_action_t *action = (pe_action_t *) gIter->data;
64 
65  switch (action->needs) {
66  case rsc_req_nothing:
67  // Anything other than start or promote requires nothing
68  break;
69 
70  case rsc_req_stonith:
72  break;
73 
74  case rsc_req_quorum:
75  if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)
76  && pe_hash_table_lookup(rsc->allowed_nodes, target->details->id)
77  && !rsc_is_known_on(rsc, target)) {
78 
79  /* If we don't know the status of the resource on the node
80  * we're about to shoot, we have to assume it may be active
81  * there. Order the resource start after the fencing. This
82  * is analogous to waiting for all the probes for a resource
83  * to complete before starting it.
84  *
85  * The most likely explanation is that the DC died and took
86  * its status with it.
87  */
88  pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid,
89  pe__node_name(target));
90  order_actions(stonith_op, action,
92  }
93  break;
94  }
95  }
96 }
97 
105 static void
106 order_stop_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op)
107 {
108  GList *gIter = NULL;
109  GList *action_list = NULL;
110  bool order_implicit = false;
111 
112  pe_resource_t *top = uber_parent(rsc);
113  pe_action_t *parent_stop = NULL;
114  pe_node_t *target;
115 
116  CRM_CHECK(stonith_op && stonith_op->node, return);
117  target = stonith_op->node;
118 
119  /* Get a list of stop actions potentially implied by the fencing */
120  action_list = pe__resource_actions(rsc, target, RSC_STOP, FALSE);
121 
122  /* If resource requires fencing, implicit actions must occur after fencing.
123  *
124  * Implied stops and demotes of resources running on guest nodes are always
125  * ordered after fencing, even if the resource does not require fencing,
126  * because guest node "fencing" is actually just a resource stop.
127  */
130 
131  order_implicit = true;
132  }
133 
134  if (action_list && order_implicit) {
135  parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL);
136  }
137 
138  for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
139  pe_action_t *action = (pe_action_t *) gIter->data;
140 
141  // The stop would never complete, so convert it into a pseudo-action.
143 
144  if (order_implicit) {
146 
147  /* Order the stonith before the parent stop (if any).
148  *
149  * Also order the stonith before the resource stop, unless the
150  * resource is inside a bundle -- that would cause a graph loop.
151  * We can rely on the parent stop's ordering instead.
152  *
153  * User constraints must not order a resource in a guest node
154  * relative to the guest node container resource. The
155  * pe_order_preserve flag marks constraints as generated by the
156  * cluster and thus immune to that check (and is irrelevant if
157  * target is not a guest).
158  */
159  if (!pe_rsc_is_bundled(rsc)) {
160  order_actions(stonith_op, action, pe_order_preserve);
161  }
162  order_actions(stonith_op, parent_stop, pe_order_preserve);
163  }
164 
165  if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
166  crm_notice("Stop of failed resource %s is implicit %s %s is fenced",
167  rsc->id, (order_implicit? "after" : "because"),
168  pe__node_name(target));
169  } else {
170  crm_info("%s is implicit %s %s is fenced",
171  action->uuid, (order_implicit? "after" : "because"),
172  pe__node_name(target));
173  }
174 
175  if (pcmk_is_set(rsc->flags, pe_rsc_notify)) {
176  pe__order_notifs_after_fencing(action, rsc, stonith_op);
177  }
178 
179 #if 0
180  /* It might be a good idea to stop healthy resources on a node about to
181  * be fenced, when possible.
182  *
183  * However, fencing must be done before a failed resource's
184  * (pseudo-)stop action, so that could create a loop. For example, given
185  * a group of A and B running on node N with a failed stop of B:
186  *
187  * fence N -> stop B (pseudo-op) -> stop A -> fence N
188  *
189  * The block below creates the stop A -> fence N ordering and therefore
190  * must (at least for now) be disabled. Instead, run the block above and
191  * treat all resources on N as B would be (i.e., as a pseudo-op after
192  * the fencing).
193  *
194  * @TODO Maybe break the "A requires B" dependency in
195  * pcmk__update_action_for_orderings() and use this block for healthy
196  * resources instead of the above.
197  */
198  crm_info("Moving healthy resource %s off %s before fencing",
199  rsc->id, pe__node_name(node));
200  pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL,
201  strdup(CRM_OP_FENCE), stonith_op,
202  pe_order_optional, rsc->cluster);
203 #endif
204  }
205 
206  g_list_free(action_list);
207 
208  /* Get a list of demote actions potentially implied by the fencing */
209  action_list = pe__resource_actions(rsc, target, RSC_DEMOTE, FALSE);
210 
211  for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
212  pe_action_t *action = (pe_action_t *) gIter->data;
213 
214  if (!(action->node->details->online) || action->node->details->unclean
215  || pcmk_is_set(rsc->flags, pe_rsc_failed)) {
216 
217  if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
218  pe_rsc_info(rsc,
219  "Demote of failed resource %s is implicit after %s is fenced",
220  rsc->id, pe__node_name(target));
221  } else {
222  pe_rsc_info(rsc, "%s is implicit after %s is fenced",
223  action->uuid, pe__node_name(target));
224  }
225 
226  /* The demote would never complete and is now implied by the
227  * fencing, so convert it into a pseudo-action.
228  */
230 
231  if (pe_rsc_is_bundled(rsc)) {
232  // Do nothing, let recovery be ordered after parent's implied stop
233 
234  } else if (order_implicit) {
236  }
237  }
238  }
239 
240  g_list_free(action_list);
241 }
242 
250 static void
251 rsc_stonith_ordering(pe_resource_t *rsc, pe_action_t *stonith_op)
252 {
253  if (rsc->children) {
254  GList *gIter = NULL;
255 
256  for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
257  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
258 
259  rsc_stonith_ordering(child_rsc, stonith_op);
260  }
261 
262  } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
263  pe_rsc_trace(rsc,
264  "Skipping fencing constraints for unmanaged resource: %s",
265  rsc->id);
266 
267  } else {
268  order_start_vs_fencing(rsc, stonith_op);
269  order_stop_vs_fencing(rsc, stonith_op);
270  }
271 }
272 
284 void
286 {
287  CRM_CHECK(stonith_op && data_set, return);
288  for (GList *r = data_set->resources; r != NULL; r = r->next) {
289  rsc_stonith_ordering((pe_resource_t *) r->data, stonith_op);
290  }
291 }
292 
302 void
304  pe_action_t *action, enum pe_ordering order)
305 {
306  /* When unfencing is in use, we order unfence actions before any probe or
307  * start of resources that require unfencing, and also of fence devices.
308  *
309  * This might seem to violate the principle that fence devices require
310  * only quorum. However, fence agents that unfence often don't have enough
311  * information to even probe or start unless the node is first unfenced.
312  */
316 
317  /* Start with an optional ordering. Requiring unfencing would result in
318  * the node being unfenced, and all its resources being stopped,
319  * whenever a new resource is added -- which would be highly suboptimal.
320  */
321  pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE,
322  node->details->data_set);
323 
324  order_actions(unfence, action, order);
325 
326  if (!pcmk__node_unfenced(node)) {
327  // But unfencing is required if it has never been done
328  char *reason = crm_strdup_printf("required by %s %s",
329  rsc->id, action->task);
330 
331  trigger_unfencing(NULL, node, reason, NULL,
332  node->details->data_set);
333  free(reason);
334  }
335  }
336 }
337 
344 void
346 {
347  pe_resource_t *container = NULL;
348  pe_action_t *stop = NULL;
349  pe_action_t *stonith_op = NULL;
350 
351  /* The fence action is just a label; we don't do anything differently for
352  * off vs. reboot. We specify it explicitly, rather than let it default to
353  * cluster's default action, because we are not _initiating_ fencing -- we
354  * are creating a pseudo-event to describe fencing that is already occurring
355  * by other means (container recovery).
356  */
357  const char *fence_action = "off";
358 
359  CRM_ASSERT(node != NULL);
360 
361  /* Check whether guest's container resource has any explicit stop or
362  * start (the stop may be implied by fencing of the guest's host).
363  */
364  container = node->details->remote_rsc->container;
365  if (container) {
366  stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP,
367  NULL);
368 
369  if (find_first_action(container->actions, NULL, CRMD_ACTION_START,
370  NULL)) {
371  fence_action = "reboot";
372  }
373  }
374 
375  /* Create a fence pseudo-event, so we have an event to order actions
376  * against, and the controller can always detect it.
377  */
378  stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean",
379  FALSE, node->details->data_set);
381 
382  /* We want to imply stops/demotes after the guest is stopped, not wait until
383  * it is restarted, so we always order pseudo-fencing after stop, not start
384  * (even though start might be closer to what is done for a real reboot).
385  */
386  if ((stop != NULL) && pcmk_is_set(stop->flags, pe_action_pseudo)) {
387  pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE,
388  NULL, FALSE,
389  node->details->data_set);
390 
391  crm_info("Implying guest %s is down (action %d) after %s fencing",
392  pe__node_name(node), stonith_op->id,
393  pe__node_name(stop->node));
394  order_actions(parent_stonith_op, stonith_op,
396 
397  } else if (stop) {
398  order_actions(stop, stonith_op,
400  crm_info("Implying guest %s is down (action %d) "
401  "after container %s is stopped (action %d)",
402  pe__node_name(node), stonith_op->id,
403  container->id, stop->id);
404  } else {
405  /* If we're fencing the guest node but there's no stop for the guest
406  * resource, we must think the guest is already stopped. However, we may
407  * think so because its resource history was just cleaned. To avoid
408  * unnecessarily considering the guest node down if it's really up,
409  * order the pseudo-fencing after any stop of the connection resource,
410  * which will be ordered after any container (re-)probe.
411  */
412  stop = find_first_action(node->details->remote_rsc->actions, NULL,
413  RSC_STOP, NULL);
414 
415  if (stop) {
416  order_actions(stop, stonith_op, pe_order_optional);
417  crm_info("Implying guest %s is down (action %d) "
418  "after connection is stopped (action %d)",
419  pe__node_name(node), stonith_op->id, stop->id);
420  } else {
421  /* Not sure why we're fencing, but everything must already be
422  * cleanly stopped.
423  */
424  crm_info("Implying guest %s is down (action %d) ",
425  pe__node_name(node), stonith_op->id);
426  }
427  }
428 
429  // Order/imply other actions relative to pseudo-fence as with real fence
430  pcmk__order_vs_fence(stonith_op, node->details->data_set);
431 }
432 
442 bool
444 {
445  const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED);
446 
447  return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches);
448 }
449 
457 void
458 pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
459 {
460  pe_node_t *node = (pe_node_t *) data;
461  pe_resource_t *rsc = (pe_resource_t *) user_data;
462 
463  pe_action_t *unfence = pe_fence_op(node, "on", true, NULL, false,
464  rsc->cluster);
465 
466  crm_debug("Ordering any stops of %s before %s, and any starts after",
467  rsc->id, unfence->uuid);
468 
469  /*
470  * It would be more efficient to order clone resources once,
471  * rather than order each instance, but ordering the instance
472  * allows us to avoid unnecessary dependencies that might conflict
473  * with user constraints.
474  *
475  * @TODO: This constraint can still produce a transition loop if the
476  * resource has a stop scheduled on the node being unfenced, and
477  * there is a user ordering constraint to start some other resource
478  * (which will be ordered after the unfence) before stopping this
479  * resource. An example is "start some slow-starting cloned service
480  * before stopping an associated virtual IP that may be moving to
481  * it":
482  * stop this -> unfencing -> start that -> stop this
483  */
484  pcmk__new_ordering(rsc, stop_key(rsc), NULL,
485  NULL, strdup(unfence->uuid), unfence,
487  rsc->cluster);
488 
489  pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
490  rsc, start_key(rsc), NULL,
492  rsc->cluster);
493 }
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:235
#define RSC_STOP
Definition: crm.h:202
A dumping ground.
#define crm_notice(fmt, args...)
Definition: logging.h:379
GHashTable * known_on
Definition: pe_types.h:399
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:49
void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
char data[0]
Definition: cpg.c:55
void pcmk__fence_guest(pe_node_t *node)
#define pe__set_action_flags(action, flags_to_set)
Definition: internal.h:89
#define CRM_OP_FENCE
Definition: crm.h:144
pe_resource_t * container
Definition: pe_types.h:412
GList * children
Definition: pe_types.h:409
void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set)
pe_action_t * find_first_action(const GList *input, const char *uuid, const char *task, const pe_node_t *on_node)
Definition: pe_actions.c:1298
pe_resource_t * remote_rsc
Definition: pe_types.h:253
#define pe_rsc_notify
Definition: pe_types.h:277
#define RSC_START
Definition: crm.h:199
void pcmk__order_vs_unfence(const pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order)
pe_working_set_t * data_set
Cluster that this node is part of.
Definition: pe_types.h:261
const char * action
Definition: pcmk_fence.c:30
GList * resources
Definition: pe_types.h:181
void trigger_unfencing(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t *data_set)
Definition: utils.c:616
#define CRMD_ACTION_START
Definition: crm.h:174
#define CRM_ATTR_UNFENCED
Definition: crm.h:120
#define CRMD_ACTION_STOP
Definition: crm.h:177
#define pe_rsc_failed
Definition: pe_types.h:292
#define crm_debug(fmt, args...)
Definition: logging.h:382
pe_resource_t * uber_parent(pe_resource_t *rsc)
Definition: complex.c:922
const char * pe_node_attribute_raw(const pe_node_t *node, const char *name)
Definition: common.c:558
bool pe__is_guest_node(const pe_node_t *node)
Definition: remote.c:33
#define stop_key(rsc)
Definition: internal.h:405
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:121
struct pe_node_shared_s * details
Definition: pe_types.h:268
pe_node_t * node
Definition: pe_types.h:434
#define pe_rsc_needs_fencing
Definition: pe_types.h:312
gboolean order_actions(pe_action_t *lh_action, pe_action_t *rh_action, enum pe_ordering order)
Definition: utils.c:488
unsigned long long flags
Definition: pe_types.h:373
pe_working_set_t * data_set
GList * actions
Definition: pe_types.h:391
G_GNUC_INTERNAL void pcmk__new_ordering(pe_resource_t *first_rsc, char *first_task, pe_action_t *first_action, pe_resource_t *then_rsc, char *then_task, pe_action_t *then_action, uint32_t flags, pe_working_set_t *data_set)
char * uuid
Definition: pe_types.h:438
enum pe_obj_types variant
Definition: pe_types.h:356
const char * id
Definition: pe_types.h:231
#define pe_rsc_fence_device
Definition: pe_types.h:279
const char * target
Definition: pcmk_fence.c:29
Cluster status and scheduling.
#define CRM_ASSERT(expr)
Definition: results.h:42
#define pe_rsc_needs_unfencing
Definition: pe_types.h:313
enum pe_action_flags flags
Definition: pe_types.h:442
pe_working_set_t * cluster
Definition: pe_types.h:353
#define pe_flag_enable_unfencing
Definition: pe_types.h:117
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:50
#define start_key(rsc)
Definition: internal.h:411
unsigned long long flags
Definition: pe_types.h:169
bool pcmk__node_unfenced(const pe_node_t *node)
GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1400
void pe__order_notifs_after_fencing(const pe_action_t *action, pe_resource_t *rsc, pe_action_t *stonith_op)
Definition: pe_notif.c:981
#define crm_info(fmt, args...)
Definition: logging.h:380
#define pe_rsc_managed
Definition: pe_types.h:273
pe_ordering
Definition: pe_types.h:506
pe_resource_t * parent
Definition: pe_types.h:354
pe_action_t * pe_fence_op(pe_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t *data_set)
Definition: pe_actions.c:1084
#define RSC_DEMOTE
Definition: crm.h:207
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:48
char * id
Definition: pe_types.h:347
GHashTable * allowed_nodes
Definition: pe_types.h:400