pacemaker  2.1.5-b7adf64e51
Scalable High-Availability cluster resource manager
pcmk_sched_fencing.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2022 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <glib.h>
13 
14 #include <crm/crm.h>
15 #include <crm/pengine/status.h>
16 #include <pacemaker-internal.h>
17 #include "libpacemaker_private.h"
18 
28 static bool
29 rsc_is_known_on(pe_resource_t *rsc, const pe_node_t *node)
30 {
31  if (pe_hash_table_lookup(rsc->known_on, node->details->id)) {
32  return TRUE;
33 
34  } else if ((rsc->variant == pe_native)
35  && pe_rsc_is_anon_clone(rsc->parent)
36  && pe_hash_table_lookup(rsc->parent->known_on, node->details->id)) {
37  /* We check only the parent, not the uber-parent, because we cannot
38  * assume that the resource is known if it is in an anonymously cloned
39  * group (which may be only partially known).
40  */
41  return TRUE;
42  }
43  return FALSE;
44 }
45 
54 static void
55 order_start_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op,
57 {
59  GList *gIter = NULL;
60 
61  CRM_CHECK(stonith_op && stonith_op->node, return);
62  target = stonith_op->node;
63 
64  for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
65  pe_action_t *action = (pe_action_t *) gIter->data;
66 
67  switch (action->needs) {
68  case rsc_req_nothing:
69  // Anything other than start or promote requires nothing
70  break;
71 
72  case rsc_req_stonith:
74  break;
75 
76  case rsc_req_quorum:
77  if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)
78  && pe_hash_table_lookup(rsc->allowed_nodes, target->details->id)
79  && !rsc_is_known_on(rsc, target)) {
80 
81  /* If we don't know the status of the resource on the node
82  * we're about to shoot, we have to assume it may be active
83  * there. Order the resource start after the fencing. This
84  * is analogous to waiting for all the probes for a resource
85  * to complete before starting it.
86  *
87  * The most likely explanation is that the DC died and took
88  * its status with it.
89  */
90  pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid,
91  pe__node_name(target));
92  order_actions(stonith_op, action,
94  }
95  break;
96  }
97  }
98 }
99 
108 static void
109 order_stop_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op,
111 {
112  GList *gIter = NULL;
113  GList *action_list = NULL;
114  bool order_implicit = false;
115 
116  pe_resource_t *top = uber_parent(rsc);
117  pe_action_t *parent_stop = NULL;
118  pe_node_t *target;
119 
120  CRM_CHECK(stonith_op && stonith_op->node, return);
121  target = stonith_op->node;
122 
123  /* Get a list of stop actions potentially implied by the fencing */
124  action_list = pe__resource_actions(rsc, target, RSC_STOP, FALSE);
125 
126  /* If resource requires fencing, implicit actions must occur after fencing.
127  *
128  * Implied stops and demotes of resources running on guest nodes are always
129  * ordered after fencing, even if the resource does not require fencing,
130  * because guest node "fencing" is actually just a resource stop.
131  */
134 
135  order_implicit = true;
136  }
137 
138  if (action_list && order_implicit) {
139  parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL);
140  }
141 
142  for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
143  pe_action_t *action = (pe_action_t *) gIter->data;
144 
145  // The stop would never complete, so convert it into a pseudo-action.
147 
148  if (order_implicit) {
150 
151  /* Order the stonith before the parent stop (if any).
152  *
153  * Also order the stonith before the resource stop, unless the
154  * resource is inside a bundle -- that would cause a graph loop.
155  * We can rely on the parent stop's ordering instead.
156  *
157  * User constraints must not order a resource in a guest node
158  * relative to the guest node container resource. The
159  * pe_order_preserve flag marks constraints as generated by the
160  * cluster and thus immune to that check (and is irrelevant if
161  * target is not a guest).
162  */
163  if (!pe_rsc_is_bundled(rsc)) {
164  order_actions(stonith_op, action, pe_order_preserve);
165  }
166  order_actions(stonith_op, parent_stop, pe_order_preserve);
167  }
168 
169  if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
170  crm_notice("Stop of failed resource %s is implicit %s %s is fenced",
171  rsc->id, (order_implicit? "after" : "because"),
172  pe__node_name(target));
173  } else {
174  crm_info("%s is implicit %s %s is fenced",
175  action->uuid, (order_implicit? "after" : "because"),
176  pe__node_name(target));
177  }
178 
179  if (pcmk_is_set(rsc->flags, pe_rsc_notify)) {
180  pe__order_notifs_after_fencing(action, rsc, stonith_op);
181  }
182 
183 #if 0
184  /* It might be a good idea to stop healthy resources on a node about to
185  * be fenced, when possible.
186  *
187  * However, fencing must be done before a failed resource's
188  * (pseudo-)stop action, so that could create a loop. For example, given
189  * a group of A and B running on node N with a failed stop of B:
190  *
191  * fence N -> stop B (pseudo-op) -> stop A -> fence N
192  *
193  * The block below creates the stop A -> fence N ordering and therefore
194  * must (at least for now) be disabled. Instead, run the block above and
195  * treat all resources on N as B would be (i.e., as a pseudo-op after
196  * the fencing).
197  *
198  * @TODO Maybe break the "A requires B" dependency in
199  * pcmk__update_action_for_orderings() and use this block for healthy
200  * resources instead of the above.
201  */
202  crm_info("Moving healthy resource %s off %s before fencing",
203  rsc->id, pe__node_name(node));
204  pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL,
205  strdup(CRM_OP_FENCE), stonith_op,
207 #endif
208  }
209 
210  g_list_free(action_list);
211 
212  /* Get a list of demote actions potentially implied by the fencing */
213  action_list = pe__resource_actions(rsc, target, RSC_DEMOTE, FALSE);
214 
215  for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
216  pe_action_t *action = (pe_action_t *) gIter->data;
217 
218  if (!(action->node->details->online) || action->node->details->unclean
219  || pcmk_is_set(rsc->flags, pe_rsc_failed)) {
220 
221  if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
222  pe_rsc_info(rsc,
223  "Demote of failed resource %s is implicit after %s is fenced",
224  rsc->id, pe__node_name(target));
225  } else {
226  pe_rsc_info(rsc, "%s is implicit after %s is fenced",
227  action->uuid, pe__node_name(target));
228  }
229 
230  /* The demote would never complete and is now implied by the
231  * fencing, so convert it into a pseudo-action.
232  */
234 
235  if (pe_rsc_is_bundled(rsc)) {
236  // Do nothing, let recovery be ordered after parent's implied stop
237 
238  } else if (order_implicit) {
240  }
241  }
242  }
243 
244  g_list_free(action_list);
245 }
246 
255 static void
256 rsc_stonith_ordering(pe_resource_t *rsc, pe_action_t *stonith_op,
258 {
259  if (rsc->children) {
260  GList *gIter = NULL;
261 
262  for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
263  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
264 
265  rsc_stonith_ordering(child_rsc, stonith_op, data_set);
266  }
267 
268  } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
269  pe_rsc_trace(rsc,
270  "Skipping fencing constraints for unmanaged resource: %s",
271  rsc->id);
272 
273  } else {
274  order_start_vs_fencing(rsc, stonith_op, data_set);
275  order_stop_vs_fencing(rsc, stonith_op, data_set);
276  }
277 }
278 
290 void
292 {
293  CRM_CHECK(stonith_op && data_set, return);
294  for (GList *r = data_set->resources; r != NULL; r = r->next) {
295  rsc_stonith_ordering((pe_resource_t *) r->data, stonith_op, data_set);
296  }
297 }
298 
308 void
310  enum pe_ordering order)
311 {
312  /* When unfencing is in use, we order unfence actions before any probe or
313  * start of resources that require unfencing, and also of fence devices.
314  *
315  * This might seem to violate the principle that fence devices require
316  * only quorum. However, fence agents that unfence often don't have enough
317  * information to even probe or start unless the node is first unfenced.
318  */
322 
323  /* Start with an optional ordering. Requiring unfencing would result in
324  * the node being unfenced, and all its resources being stopped,
325  * whenever a new resource is added -- which would be highly suboptimal.
326  */
327  pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE,
328  rsc->cluster);
329 
330  order_actions(unfence, action, order);
331 
332  if (!pcmk__node_unfenced(node)) {
333  // But unfencing is required if it has never been done
334  char *reason = crm_strdup_printf("required by %s %s",
335  rsc->id, action->task);
336 
337  trigger_unfencing(NULL, node, reason, NULL, rsc->cluster);
338  free(reason);
339  }
340  }
341 }
342 
349 void
351 {
352  pe_resource_t *container = NULL;
353  pe_action_t *stop = NULL;
354  pe_action_t *stonith_op = NULL;
355 
356  /* The fence action is just a label; we don't do anything differently for
357  * off vs. reboot. We specify it explicitly, rather than let it default to
358  * cluster's default action, because we are not _initiating_ fencing -- we
359  * are creating a pseudo-event to describe fencing that is already occurring
360  * by other means (container recovery).
361  */
362  const char *fence_action = "off";
363 
364  CRM_ASSERT(node != NULL);
365 
366  /* Check whether guest's container resource has any explicit stop or
367  * start (the stop may be implied by fencing of the guest's host).
368  */
369  container = node->details->remote_rsc->container;
370  if (container) {
371  stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP,
372  NULL);
373 
374  if (find_first_action(container->actions, NULL, CRMD_ACTION_START,
375  NULL)) {
376  fence_action = "reboot";
377  }
378  }
379 
380  /* Create a fence pseudo-event, so we have an event to order actions
381  * against, and the controller can always detect it.
382  */
383  stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean",
384  FALSE, node->details->data_set);
386 
387  /* We want to imply stops/demotes after the guest is stopped, not wait until
388  * it is restarted, so we always order pseudo-fencing after stop, not start
389  * (even though start might be closer to what is done for a real reboot).
390  */
391  if ((stop != NULL) && pcmk_is_set(stop->flags, pe_action_pseudo)) {
392  pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE,
393  NULL, FALSE,
394  node->details->data_set);
395 
396  crm_info("Implying guest %s is down (action %d) after %s fencing",
397  pe__node_name(node), stonith_op->id,
398  pe__node_name(stop->node));
399  order_actions(parent_stonith_op, stonith_op,
401 
402  } else if (stop) {
403  order_actions(stop, stonith_op,
405  crm_info("Implying guest %s is down (action %d) "
406  "after container %s is stopped (action %d)",
407  pe__node_name(node), stonith_op->id,
408  container->id, stop->id);
409  } else {
410  /* If we're fencing the guest node but there's no stop for the guest
411  * resource, we must think the guest is already stopped. However, we may
412  * think so because its resource history was just cleaned. To avoid
413  * unnecessarily considering the guest node down if it's really up,
414  * order the pseudo-fencing after any stop of the connection resource,
415  * which will be ordered after any container (re-)probe.
416  */
417  stop = find_first_action(node->details->remote_rsc->actions, NULL,
418  RSC_STOP, NULL);
419 
420  if (stop) {
421  order_actions(stop, stonith_op, pe_order_optional);
422  crm_info("Implying guest %s is down (action %d) "
423  "after connection is stopped (action %d)",
424  pe__node_name(node), stonith_op->id, stop->id);
425  } else {
426  /* Not sure why we're fencing, but everything must already be
427  * cleanly stopped.
428  */
429  crm_info("Implying guest %s is down (action %d) ",
430  pe__node_name(node), stonith_op->id);
431  }
432  }
433 
434  // Order/imply other actions relative to pseudo-fence as with real fence
435  pcmk__order_vs_fence(stonith_op, node->details->data_set);
436 }
437 
447 bool
449 {
450  const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED);
451 
452  return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches);
453 }
454 
462 void
463 pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
464 {
465  pe_node_t *node = (pe_node_t *) data;
466  pe_resource_t *rsc = (pe_resource_t *) user_data;
467 
468  pe_action_t *unfence = pe_fence_op(node, "on", true, NULL, false,
469  rsc->cluster);
470 
471  crm_debug("Ordering any stops of %s before %s, and any starts after",
472  rsc->id, unfence->uuid);
473 
474  /*
475  * It would be more efficient to order clone resources once,
476  * rather than order each instance, but ordering the instance
477  * allows us to avoid unnecessary dependencies that might conflict
478  * with user constraints.
479  *
480  * @TODO: This constraint can still produce a transition loop if the
481  * resource has a stop scheduled on the node being unfenced, and
482  * there is a user ordering constraint to start some other resource
483  * (which will be ordered after the unfence) before stopping this
484  * resource. An example is "start some slow-starting cloned service
485  * before stopping an associated virtual IP that may be moving to
486  * it":
487  * stop this -> unfencing -> start that -> stop this
488  */
489  pcmk__new_ordering(rsc, stop_key(rsc), NULL,
490  NULL, strdup(unfence->uuid), unfence,
492  rsc->cluster);
493 
494  pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
495  rsc, start_key(rsc), NULL,
497  rsc->cluster);
498 }
bool pcmk__node_unfenced(pe_node_t *node)
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:227
#define RSC_STOP
Definition: crm.h:202
A dumping ground.
#define crm_notice(fmt, args...)
Definition: logging.h:361
GHashTable * known_on
Definition: pe_types.h:374
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:46
void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
char data[0]
Definition: cpg.c:55
void pcmk__fence_guest(pe_node_t *node)
#define pe__set_action_flags(action, flags_to_set)
Definition: internal.h:86
#define CRM_OP_FENCE
Definition: crm.h:144
pe_resource_t * container
Definition: pe_types.h:387
void pcmk__order_vs_unfence(pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order)
GList * children
Definition: pe_types.h:384
void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set)
pe_action_t * find_first_action(const GList *input, const char *uuid, const char *task, const pe_node_t *on_node)
Definition: pe_actions.c:1296
pe_resource_t * remote_rsc
Definition: pe_types.h:237
#define pe_rsc_notify
Definition: pe_types.h:261
#define RSC_START
Definition: crm.h:199
pe_working_set_t * data_set
Cluster that this node is part of.
Definition: pe_types.h:245
const char * action
Definition: pcmk_fence.c:30
GList * resources
Definition: pe_types.h:165
void trigger_unfencing(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t *data_set)
Definition: utils.c:604
#define CRMD_ACTION_START
Definition: crm.h:174
#define CRM_ATTR_UNFENCED
Definition: crm.h:120
#define CRMD_ACTION_STOP
Definition: crm.h:177
#define pe_rsc_failed
Definition: pe_types.h:276
#define crm_debug(fmt, args...)
Definition: logging.h:364
pe_resource_t * uber_parent(pe_resource_t *rsc)
Definition: complex.c:912
const char * pe_node_attribute_raw(const pe_node_t *node, const char *name)
Definition: common.c:562
bool pe__is_guest_node(const pe_node_t *node)
Definition: remote.c:33
void pe__order_notifs_after_fencing(pe_action_t *action, pe_resource_t *rsc, pe_action_t *stonith_op)
Definition: pe_notif.c:977
#define stop_key(rsc)
Definition: internal.h:414
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:121
struct pe_node_shared_s * details
Definition: pe_types.h:252
pe_node_t * node
Definition: pe_types.h:407
#define pe_rsc_needs_fencing
Definition: pe_types.h:294
gboolean order_actions(pe_action_t *lh_action, pe_action_t *rh_action, enum pe_ordering order)
Definition: utils.c:474
unsigned long long flags
Definition: pe_types.h:355
pe_working_set_t * data_set
GList * actions
Definition: pe_types.h:366
G_GNUC_INTERNAL void pcmk__new_ordering(pe_resource_t *first_rsc, char *first_task, pe_action_t *first_action, pe_resource_t *then_rsc, char *then_task, pe_action_t *then_action, uint32_t flags, pe_working_set_t *data_set)
char * uuid
Definition: pe_types.h:411
enum pe_obj_types variant
Definition: pe_types.h:338
const char * id
Definition: pe_types.h:215
#define pe_rsc_fence_device
Definition: pe_types.h:263
const char * target
Definition: pcmk_fence.c:29
Cluster status and scheduling.
#define CRM_ASSERT(expr)
Definition: results.h:42
#define pe_rsc_needs_unfencing
Definition: pe_types.h:295
enum pe_action_flags flags
Definition: pe_types.h:415
pe_working_set_t * cluster
Definition: pe_types.h:335
#define pe_flag_enable_unfencing
Definition: pe_types.h:101
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:47
#define start_key(rsc)
Definition: internal.h:420
unsigned long long flags
Definition: pe_types.h:153
GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1398
#define crm_info(fmt, args...)
Definition: logging.h:362
#define pe_rsc_managed
Definition: pe_types.h:257
pe_ordering
Definition: pe_types.h:479
pe_resource_t * parent
Definition: pe_types.h:336
pe_action_t * pe_fence_op(pe_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t *data_set)
Definition: pe_actions.c:1081
#define RSC_DEMOTE
Definition: crm.h:207
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:45
char * id
Definition: pe_types.h:329
GHashTable * allowed_nodes
Definition: pe_types.h:375