pacemaker  2.1.9-49aab99839
Scalable High-Availability cluster resource manager
pcmk_sched_fencing.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2024 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <glib.h>
13 
14 #include <crm/crm.h>
15 #include <crm/pengine/status.h>
16 #include <pacemaker-internal.h>
17 #include "libpacemaker_private.h"
18 
28 static bool
29 rsc_is_known_on(const pcmk_resource_t *rsc, const pcmk_node_t *node)
30 {
31  if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
32  return TRUE;
33 
34  } else if (pcmk__is_primitive(rsc)
35  && pcmk__is_anonymous_clone(rsc->parent)
36  && (g_hash_table_lookup(rsc->parent->known_on,
37  node->details->id) != NULL)) {
38  /* We check only the parent, not the uber-parent, because we cannot
39  * assume that the resource is known if it is in an anonymously cloned
40  * group (which may be only partially known).
41  */
42  return TRUE;
43  }
44  return FALSE;
45 }
46 
54 static void
55 order_start_vs_fencing(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
56 {
58 
59  CRM_CHECK(stonith_op && stonith_op->node, return);
60  target = stonith_op->node;
61 
62  for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) {
63  pcmk_action_t *action = iter->data;
64 
65  switch (action->needs) {
67  // Anything other than start or promote requires nothing
68  break;
69 
72  break;
73 
75  if (pcmk__str_eq(action->task, PCMK_ACTION_START,
77  && (g_hash_table_lookup(rsc->allowed_nodes,
78  target->details->id) != NULL)
79  && !rsc_is_known_on(rsc, target)) {
80 
81  /* If we don't know the status of the resource on the node
82  * we're about to shoot, we have to assume it may be active
83  * there. Order the resource start after the fencing. This
84  * is analogous to waiting for all the probes for a resource
85  * to complete before starting it.
86  *
87  * The most likely explanation is that the DC died and took
88  * its status with it.
89  */
90  pcmk__rsc_debug(rsc, "Ordering %s after %s recovery",
91  action->uuid, pcmk__node_name(target));
92  order_actions(stonith_op, action,
95  }
96  break;
97  }
98  }
99 }
100 
108 static void
109 order_stop_vs_fencing(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
110 {
111  GList *iter = NULL;
112  GList *action_list = NULL;
113  bool order_implicit = false;
114 
115  pcmk_resource_t *top = uber_parent(rsc);
116  pcmk_action_t *parent_stop = NULL;
118 
119  CRM_CHECK(stonith_op && stonith_op->node, return);
120  target = stonith_op->node;
121 
122  /* Get a list of stop actions potentially implied by the fencing */
123  action_list = pe__resource_actions(rsc, target, PCMK_ACTION_STOP, FALSE);
124 
125  /* If resource requires fencing, implicit actions must occur after fencing.
126  *
127  * Implied stops and demotes of resources running on guest nodes are always
128  * ordered after fencing, even if the resource does not require fencing,
129  * because guest node "fencing" is actually just a resource stop.
130  */
132  || pcmk__is_guest_or_bundle_node(target)) {
133 
134  order_implicit = true;
135  }
136 
137  if (action_list && order_implicit) {
138  parent_stop = find_first_action(top->actions, NULL, PCMK_ACTION_STOP,
139  NULL);
140  }
141 
142  for (iter = action_list; iter != NULL; iter = iter->next) {
143  pcmk_action_t *action = iter->data;
144 
145  // The stop would never complete, so convert it into a pseudo-action.
147 
148  if (order_implicit) {
149  /* Order the stonith before the parent stop (if any).
150  *
151  * Also order the stonith before the resource stop, unless the
152  * resource is inside a bundle -- that would cause a graph loop.
153  * We can rely on the parent stop's ordering instead.
154  *
155  * User constraints must not order a resource in a guest node
156  * relative to the guest node container resource. The
157  * pcmk__ar_guest_allowed flag marks constraints as generated by the
158  * cluster and thus immune to that check (and is irrelevant if
159  * target is not a guest).
160  */
161  if (!pcmk__is_bundled(rsc)) {
163  }
164  order_actions(stonith_op, parent_stop, pcmk__ar_guest_allowed);
165  }
166 
167  if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
168  crm_notice("Stop of failed resource %s is implicit %s %s is fenced",
169  rsc->id, (order_implicit? "after" : "because"),
170  pcmk__node_name(target));
171  } else {
172  crm_info("%s is implicit %s %s is fenced",
173  action->uuid, (order_implicit? "after" : "because"),
174  pcmk__node_name(target));
175  }
176 
177  if (pcmk_is_set(rsc->flags, pcmk_rsc_notify)) {
178  pe__order_notifs_after_fencing(action, rsc, stonith_op);
179  }
180 
181 #if 0
182  /* It might be a good idea to stop healthy resources on a node about to
183  * be fenced, when possible.
184  *
185  * However, fencing must be done before a failed resource's
186  * (pseudo-)stop action, so that could create a loop. For example, given
187  * a group of A and B running on node N with a failed stop of B:
188  *
189  * fence N -> stop B (pseudo-op) -> stop A -> fence N
190  *
191  * The block below creates the stop A -> fence N ordering and therefore
192  * must (at least for now) be disabled. Instead, run the block above and
193  * treat all resources on N as B would be (i.e., as a pseudo-op after
194  * the fencing).
195  *
196  * @TODO Maybe break the "A requires B" dependency in
197  * pcmk__update_action_for_orderings() and use this block for healthy
198  * resources instead of the above.
199  */
200  crm_info("Moving healthy resource %s off %s before fencing",
201  rsc->id, pcmk__node_name(node));
202  pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL,
203  strdup(PCMK_ACTION_STONITH), stonith_op,
204  pcmk__ar_ordered, rsc->cluster);
205 #endif
206  }
207 
208  g_list_free(action_list);
209 
210  /* Get a list of demote actions potentially implied by the fencing */
211  action_list = pe__resource_actions(rsc, target, PCMK_ACTION_DEMOTE, FALSE);
212 
213  for (iter = action_list; iter != NULL; iter = iter->next) {
214  pcmk_action_t *action = iter->data;
215 
216  if (!(action->node->details->online) || action->node->details->unclean
217  || pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
218 
219  if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
220  pcmk__rsc_info(rsc,
221  "Demote of failed resource %s is implicit "
222  "after %s is fenced",
223  rsc->id, pcmk__node_name(target));
224  } else {
225  pcmk__rsc_info(rsc, "%s is implicit after %s is fenced",
226  action->uuid, pcmk__node_name(target));
227  }
228 
229  /* The demote would never complete and is now implied by the
230  * fencing, so convert it into a pseudo-action.
231  */
234 
235  if (pcmk__is_bundled(rsc)) {
236  // Recovery will be ordered as usual after parent's implied stop
237 
238  } else if (order_implicit) {
239  order_actions(stonith_op, action,
241  }
242  }
243  }
244 
245  g_list_free(action_list);
246 }
247 
255 static void
256 rsc_stonith_ordering(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
257 {
258  if (rsc->children) {
259  for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
260  pcmk_resource_t *child_rsc = iter->data;
261 
262  rsc_stonith_ordering(child_rsc, stonith_op);
263  }
264 
265  } else if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
266  pcmk__rsc_trace(rsc,
267  "Skipping fencing constraints for unmanaged resource: "
268  "%s", rsc->id);
269 
270  } else {
271  order_start_vs_fencing(rsc, stonith_op);
272  order_stop_vs_fencing(rsc, stonith_op);
273  }
274 }
275 
287 void
289 {
290  CRM_CHECK(stonith_op && scheduler, return);
291  for (GList *r = scheduler->resources; r != NULL; r = r->next) {
292  rsc_stonith_ordering((pcmk_resource_t *) r->data, stonith_op);
293  }
294 }
295 
305 void
308  enum pcmk__action_relation_flags order)
309 {
310  /* When unfencing is in use, we order unfence actions before any probe or
311  * start of resources that require unfencing, and also of fence devices.
312  *
313  * This might seem to violate the principle that fence devices require
314  * only quorum. However, fence agents that unfence often don't have enough
315  * information to even probe or start unless the node is first unfenced.
316  */
320 
321  /* Start with an optional ordering. Requiring unfencing would result in
322  * the node being unfenced, and all its resources being stopped,
323  * whenever a new resource is added -- which would be highly suboptimal.
324  */
325  pcmk_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, TRUE, NULL,
326  FALSE, node->details->data_set);
327 
328  order_actions(unfence, action, order);
329 
330  if (!pcmk__node_unfenced(node)) {
331  // But unfencing is required if it has never been done
332  char *reason = crm_strdup_printf("required by %s %s",
333  rsc->id, action->task);
334 
335  trigger_unfencing(NULL, node, reason, NULL,
336  node->details->data_set);
337  free(reason);
338  }
339  }
340 }
341 
348 void
350 {
351  pcmk_resource_t *container = NULL;
352  pcmk_action_t *stop = NULL;
353  pcmk_action_t *stonith_op = NULL;
354 
355  /* The fence action is just a label; we don't do anything differently for
356  * off vs. reboot. We specify it explicitly, rather than let it default to
357  * cluster's default action, because we are not _initiating_ fencing -- we
358  * are creating a pseudo-event to describe fencing that is already occurring
359  * by other means (container recovery).
360  */
361  const char *fence_action = PCMK_ACTION_OFF;
362 
363  pcmk__assert(node != NULL);
364 
365  /* Check whether guest's container resource has any explicit stop or
366  * start (the stop may be implied by fencing of the guest's host).
367  */
368  container = node->details->remote_rsc->container;
369  if (container) {
370  stop = find_first_action(container->actions, NULL, PCMK_ACTION_STOP,
371  NULL);
372 
373  if (find_first_action(container->actions, NULL, PCMK_ACTION_START,
374  NULL)) {
375  fence_action = PCMK_ACTION_REBOOT;
376  }
377  }
378 
379  /* Create a fence pseudo-event, so we have an event to order actions
380  * against, and the controller can always detect it.
381  */
382  stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean",
383  FALSE, node->details->data_set);
385 
386  /* We want to imply stops/demotes after the guest is stopped, not wait until
387  * it is restarted, so we always order pseudo-fencing after stop, not start
388  * (even though start might be closer to what is done for a real reboot).
389  */
390  if ((stop != NULL) && pcmk_is_set(stop->flags, pcmk_action_pseudo)) {
391  pcmk_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE,
392  NULL, FALSE,
393  node->details->data_set);
394 
395  crm_info("Implying guest %s is down (action %d) after %s fencing",
396  pcmk__node_name(node), stonith_op->id,
397  pcmk__node_name(stop->node));
398  order_actions(parent_stonith_op, stonith_op,
401 
402  } else if (stop) {
403  order_actions(stop, stonith_op,
406  crm_info("Implying guest %s is down (action %d) "
407  "after container %s is stopped (action %d)",
408  pcmk__node_name(node), stonith_op->id,
409  container->id, stop->id);
410  } else {
411  /* If we're fencing the guest node but there's no stop for the guest
412  * resource, we must think the guest is already stopped. However, we may
413  * think so because its resource history was just cleaned. To avoid
414  * unnecessarily considering the guest node down if it's really up,
415  * order the pseudo-fencing after any stop of the connection resource,
416  * which will be ordered after any container (re-)probe.
417  */
418  stop = find_first_action(node->details->remote_rsc->actions, NULL,
419  PCMK_ACTION_STOP, NULL);
420 
421  if (stop) {
422  order_actions(stop, stonith_op, pcmk__ar_ordered);
423  crm_info("Implying guest %s is down (action %d) "
424  "after connection is stopped (action %d)",
425  pcmk__node_name(node), stonith_op->id, stop->id);
426  } else {
427  /* Not sure why we're fencing, but everything must already be
428  * cleanly stopped.
429  */
430  crm_info("Implying guest %s is down (action %d) ",
431  pcmk__node_name(node), stonith_op->id);
432  }
433  }
434 
435  // Order/imply other actions relative to pseudo-fence as with real fence
436  pcmk__order_vs_fence(stonith_op, node->details->data_set);
437 }
438 
448 bool
450 {
451  const char *unfenced = pcmk__node_attr(node, CRM_ATTR_UNFENCED, NULL,
453 
454  return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches);
455 }
456 
464 void
465 pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
466 {
467  pcmk_node_t *node = (pcmk_node_t *) data;
468  pcmk_resource_t *rsc = (pcmk_resource_t *) user_data;
469 
470  pcmk_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, true, NULL,
471  false, rsc->cluster);
472 
473  crm_debug("Ordering any stops of %s before %s, and any starts after",
474  rsc->id, unfence->uuid);
475 
476  /*
477  * It would be more efficient to order clone resources once,
478  * rather than order each instance, but ordering the instance
479  * allows us to avoid unnecessary dependencies that might conflict
480  * with user constraints.
481  *
482  * @TODO: This constraint can still produce a transition loop if the
483  * resource has a stop scheduled on the node being unfenced, and
484  * there is a user ordering constraint to start some other resource
485  * (which will be ordered after the unfence) before stopping this
486  * resource. An example is "start some slow-starting cloned service
487  * before stopping an associated virtual IP that may be moving to
488  * it":
489  * stop this -> unfencing -> start that -> stop this
490  */
491  pcmk__new_ordering(rsc, stop_key(rsc), NULL,
492  NULL, strdup(unfence->uuid), unfence,
494  rsc->cluster);
495 
496  pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
497  rsc, start_key(rsc), NULL,
500  rsc->cluster);
501 }
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:245
Relation applies only if actions are on same node.
A dumping ground.
void trigger_unfencing(pcmk_resource_t *rsc, pcmk_node_t *node, const char *reason, pcmk_action_t *dependency, pcmk_scheduler_t *scheduler)
Definition: utils.c:591
#define crm_notice(fmt, args...)
Definition: logging.h:397
GHashTable * known_on
Definition: resources.h:459
&#39;then&#39; is runnable (and migratable) only if &#39;first&#39; is runnable
pcmk_scheduler_t * cluster
Definition: resources.h:408
void pcmk__fence_guest(pcmk_node_t *node)
void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
char data[0]
Definition: cpg.c:58
G_GNUC_INTERNAL void pcmk__new_ordering(pcmk_resource_t *first_rsc, char *first_task, pcmk_action_t *first_action, pcmk_resource_t *then_rsc, char *then_task, pcmk_action_t *then_action, uint32_t flags, pcmk_scheduler_t *sched)
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
Definition: complex.c:1046
GList * children
Definition: resources.h:471
#define pcmk__rsc_trace(rsc, fmt, args...)
#define PCMK_ACTION_ON
Definition: actions.h:64
#define pcmk__rsc_info(rsc, fmt, args...)
pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler)
Definition: pe_actions.c:1281
If &#39;first&#39; is required, &#39;then&#39; action for instance on same node is.
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1553
pcmk_scheduler_t * data_set
Definition: nodes.h:154
pcmk_resource_t * container
Definition: resources.h:476
Ordering applies even if &#39;first&#39; runs on guest node created by &#39;then&#39;.
const char * action
Definition: pcmk_fence.c:30
GList * resources
Definition: scheduler.h:231
#define pcmk__rsc_debug(rsc, fmt, args...)
pcmk__action_relation_flags
#define CRM_ATTR_UNFENCED
Definition: crm.h:106
#define PCMK_ACTION_DEMOTE
Definition: actions.h:49
#define PCMK_ACTION_REBOOT
Definition: actions.h:68
pcmk_resource_t * parent
Definition: resources.h:409
pcmk_node_t * node
Definition: actions.h:341
#define crm_debug(fmt, args...)
Definition: logging.h:402
Actions are ordered (optionally, if no other flags are set)
void pcmk__order_vs_unfence(const pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_action_t *action, enum pcmk__action_relation_flags order)
#define stop_key(rsc)
Definition: internal.h:213
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:94
struct pe_node_shared_s * details
Definition: nodes.h:168
#define PCMK_ACTION_START
Definition: actions.h:72
unsigned long long flags
Definition: resources.h:428
#define PCMK_ACTION_STOP
Definition: actions.h:75
GList * actions
Definition: resources.h:444
#define PCMK_ACTION_STONITH
Definition: actions.h:74
char * uuid
Definition: actions.h:344
gboolean order_actions(pcmk_action_t *first, pcmk_action_t *then, uint32_t flags)
Definition: utils.c:457
const char * id
Definition: nodes.h:73
#define pcmk__assert(expr)
const char * target
Definition: pcmk_fence.c:29
void pcmk__order_vs_fence(pcmk_action_t *stonith_op, pcmk_scheduler_t *scheduler)
Cluster status and scheduling.
void pe__order_notifs_after_fencing(const pcmk_action_t *action, pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
Definition: pe_notif.c:988
pcmk_action_t * find_first_action(const GList *input, const char *uuid, const char *task, const pcmk_node_t *on_node)
Definition: pe_actions.c:1451
pcmk_scheduler_t * scheduler
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
Definition: attrs.c:118
enum pe_action_flags flags
Definition: actions.h:349
#define pcmk__set_action_flags(action, flags_to_set)
#define PCMK_ACTION_OFF
Definition: actions.h:63
#define start_key(rsc)
Definition: internal.h:219
unsigned long long flags
Definition: scheduler.h:211
#define crm_info(fmt, args...)
Definition: logging.h:399
pcmk_resource_t * remote_rsc
Definition: nodes.h:136
GHashTable * allowed_nodes
Definition: resources.h:462
Where resource is running.
bool pcmk__node_unfenced(const pcmk_node_t *node)