pacemaker  2.1.7-0f7f88312f
Scalable High-Availability cluster resource manager
pcmk_sched_fencing.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2023 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <glib.h>
13 
14 #include <crm/crm.h>
15 #include <crm/pengine/status.h>
16 #include <pacemaker-internal.h>
17 #include "libpacemaker_private.h"
18 
28 static bool
29 rsc_is_known_on(const pcmk_resource_t *rsc, const pcmk_node_t *node)
30 {
31  if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
32  return TRUE;
33 
34  } else if ((rsc->variant == pcmk_rsc_variant_primitive)
35  && pe_rsc_is_anon_clone(rsc->parent)
36  && (g_hash_table_lookup(rsc->parent->known_on,
37  node->details->id) != NULL)) {
38  /* We check only the parent, not the uber-parent, because we cannot
39  * assume that the resource is known if it is in an anonymously cloned
40  * group (which may be only partially known).
41  */
42  return TRUE;
43  }
44  return FALSE;
45 }
46 
54 static void
55 order_start_vs_fencing(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
56 {
58 
59  CRM_CHECK(stonith_op && stonith_op->node, return);
60  target = stonith_op->node;
61 
62  for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) {
63  pcmk_action_t *action = iter->data;
64 
65  switch (action->needs) {
67  // Anything other than start or promote requires nothing
68  break;
69 
72  break;
73 
75  if (pcmk__str_eq(action->task, PCMK_ACTION_START,
77  && (g_hash_table_lookup(rsc->allowed_nodes,
78  target->details->id) != NULL)
79  && !rsc_is_known_on(rsc, target)) {
80 
81  /* If we don't know the status of the resource on the node
82  * we're about to shoot, we have to assume it may be active
83  * there. Order the resource start after the fencing. This
84  * is analogous to waiting for all the probes for a resource
85  * to complete before starting it.
86  *
87  * The most likely explanation is that the DC died and took
88  * its status with it.
89  */
90  pe_rsc_debug(rsc, "Ordering %s after %s recovery",
91  action->uuid, pe__node_name(target));
92  order_actions(stonith_op, action,
95  }
96  break;
97  }
98  }
99 }
100 
108 static void
109 order_stop_vs_fencing(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
110 {
111  GList *iter = NULL;
112  GList *action_list = NULL;
113  bool order_implicit = false;
114 
115  pcmk_resource_t *top = uber_parent(rsc);
116  pcmk_action_t *parent_stop = NULL;
118 
119  CRM_CHECK(stonith_op && stonith_op->node, return);
120  target = stonith_op->node;
121 
122  /* Get a list of stop actions potentially implied by the fencing */
123  action_list = pe__resource_actions(rsc, target, PCMK_ACTION_STOP, FALSE);
124 
125  /* If resource requires fencing, implicit actions must occur after fencing.
126  *
127  * Implied stops and demotes of resources running on guest nodes are always
128  * ordered after fencing, even if the resource does not require fencing,
129  * because guest node "fencing" is actually just a resource stop.
130  */
133 
134  order_implicit = true;
135  }
136 
137  if (action_list && order_implicit) {
138  parent_stop = find_first_action(top->actions, NULL, PCMK_ACTION_STOP,
139  NULL);
140  }
141 
142  for (iter = action_list; iter != NULL; iter = iter->next) {
143  pcmk_action_t *action = iter->data;
144 
145  // The stop would never complete, so convert it into a pseudo-action.
147 
148  if (order_implicit) {
149  /* Order the stonith before the parent stop (if any).
150  *
151  * Also order the stonith before the resource stop, unless the
152  * resource is inside a bundle -- that would cause a graph loop.
153  * We can rely on the parent stop's ordering instead.
154  *
155  * User constraints must not order a resource in a guest node
156  * relative to the guest node container resource. The
157  * pcmk__ar_guest_allowed flag marks constraints as generated by the
158  * cluster and thus immune to that check (and is irrelevant if
159  * target is not a guest).
160  */
161  if (!pe_rsc_is_bundled(rsc)) {
163  }
164  order_actions(stonith_op, parent_stop, pcmk__ar_guest_allowed);
165  }
166 
167  if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
168  crm_notice("Stop of failed resource %s is implicit %s %s is fenced",
169  rsc->id, (order_implicit? "after" : "because"),
170  pe__node_name(target));
171  } else {
172  crm_info("%s is implicit %s %s is fenced",
173  action->uuid, (order_implicit? "after" : "because"),
174  pe__node_name(target));
175  }
176 
177  if (pcmk_is_set(rsc->flags, pcmk_rsc_notify)) {
178  pe__order_notifs_after_fencing(action, rsc, stonith_op);
179  }
180 
181 #if 0
182  /* It might be a good idea to stop healthy resources on a node about to
183  * be fenced, when possible.
184  *
185  * However, fencing must be done before a failed resource's
186  * (pseudo-)stop action, so that could create a loop. For example, given
187  * a group of A and B running on node N with a failed stop of B:
188  *
189  * fence N -> stop B (pseudo-op) -> stop A -> fence N
190  *
191  * The block below creates the stop A -> fence N ordering and therefore
192  * must (at least for now) be disabled. Instead, run the block above and
193  * treat all resources on N as B would be (i.e., as a pseudo-op after
194  * the fencing).
195  *
196  * @TODO Maybe break the "A requires B" dependency in
197  * pcmk__update_action_for_orderings() and use this block for healthy
198  * resources instead of the above.
199  */
200  crm_info("Moving healthy resource %s off %s before fencing",
201  rsc->id, pe__node_name(node));
202  pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL,
203  strdup(PCMK_ACTION_STONITH), stonith_op,
204  pcmk__ar_ordered, rsc->cluster);
205 #endif
206  }
207 
208  g_list_free(action_list);
209 
210  /* Get a list of demote actions potentially implied by the fencing */
211  action_list = pe__resource_actions(rsc, target, PCMK_ACTION_DEMOTE, FALSE);
212 
213  for (iter = action_list; iter != NULL; iter = iter->next) {
214  pcmk_action_t *action = iter->data;
215 
216  if (!(action->node->details->online) || action->node->details->unclean
217  || pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
218 
219  if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
220  pe_rsc_info(rsc,
221  "Demote of failed resource %s is implicit "
222  "after %s is fenced",
223  rsc->id, pe__node_name(target));
224  } else {
225  pe_rsc_info(rsc, "%s is implicit after %s is fenced",
226  action->uuid, pe__node_name(target));
227  }
228 
229  /* The demote would never complete and is now implied by the
230  * fencing, so convert it into a pseudo-action.
231  */
234 
235  if (pe_rsc_is_bundled(rsc)) {
236  // Recovery will be ordered as usual after parent's implied stop
237 
238  } else if (order_implicit) {
239  order_actions(stonith_op, action,
241  }
242  }
243  }
244 
245  g_list_free(action_list);
246 }
247 
255 static void
256 rsc_stonith_ordering(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
257 {
258  if (rsc->children) {
259  for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
260  pcmk_resource_t *child_rsc = iter->data;
261 
262  rsc_stonith_ordering(child_rsc, stonith_op);
263  }
264 
265  } else if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
266  pe_rsc_trace(rsc,
267  "Skipping fencing constraints for unmanaged resource: %s",
268  rsc->id);
269 
270  } else {
271  order_start_vs_fencing(rsc, stonith_op);
272  order_stop_vs_fencing(rsc, stonith_op);
273  }
274 }
275 
287 void
289 {
290  CRM_CHECK(stonith_op && scheduler, return);
291  for (GList *r = scheduler->resources; r != NULL; r = r->next) {
292  rsc_stonith_ordering((pcmk_resource_t *) r->data, stonith_op);
293  }
294 }
295 
305 void
308  enum pcmk__action_relation_flags order)
309 {
310  /* When unfencing is in use, we order unfence actions before any probe or
311  * start of resources that require unfencing, and also of fence devices.
312  *
313  * This might seem to violate the principle that fence devices require
314  * only quorum. However, fence agents that unfence often don't have enough
315  * information to even probe or start unless the node is first unfenced.
316  */
320 
321  /* Start with an optional ordering. Requiring unfencing would result in
322  * the node being unfenced, and all its resources being stopped,
323  * whenever a new resource is added -- which would be highly suboptimal.
324  */
325  pcmk_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, TRUE, NULL,
326  FALSE, node->details->data_set);
327 
328  order_actions(unfence, action, order);
329 
330  if (!pcmk__node_unfenced(node)) {
331  // But unfencing is required if it has never been done
332  char *reason = crm_strdup_printf("required by %s %s",
333  rsc->id, action->task);
334 
335  trigger_unfencing(NULL, node, reason, NULL,
336  node->details->data_set);
337  free(reason);
338  }
339  }
340 }
341 
348 void
350 {
351  pcmk_resource_t *container = NULL;
352  pcmk_action_t *stop = NULL;
353  pcmk_action_t *stonith_op = NULL;
354 
355  /* The fence action is just a label; we don't do anything differently for
356  * off vs. reboot. We specify it explicitly, rather than let it default to
357  * cluster's default action, because we are not _initiating_ fencing -- we
358  * are creating a pseudo-event to describe fencing that is already occurring
359  * by other means (container recovery).
360  */
361  const char *fence_action = PCMK_ACTION_OFF;
362 
363  CRM_ASSERT(node != NULL);
364 
365  /* Check whether guest's container resource has any explicit stop or
366  * start (the stop may be implied by fencing of the guest's host).
367  */
368  container = node->details->remote_rsc->container;
369  if (container) {
370  stop = find_first_action(container->actions, NULL, PCMK_ACTION_STOP,
371  NULL);
372 
373  if (find_first_action(container->actions, NULL, PCMK_ACTION_START,
374  NULL)) {
375  fence_action = PCMK_ACTION_REBOOT;
376  }
377  }
378 
379  /* Create a fence pseudo-event, so we have an event to order actions
380  * against, and the controller can always detect it.
381  */
382  stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean",
383  FALSE, node->details->data_set);
385 
386  /* We want to imply stops/demotes after the guest is stopped, not wait until
387  * it is restarted, so we always order pseudo-fencing after stop, not start
388  * (even though start might be closer to what is done for a real reboot).
389  */
390  if ((stop != NULL) && pcmk_is_set(stop->flags, pcmk_action_pseudo)) {
391  pcmk_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE,
392  NULL, FALSE,
393  node->details->data_set);
394 
395  crm_info("Implying guest %s is down (action %d) after %s fencing",
396  pe__node_name(node), stonith_op->id,
397  pe__node_name(stop->node));
398  order_actions(parent_stonith_op, stonith_op,
401 
402  } else if (stop) {
403  order_actions(stop, stonith_op,
406  crm_info("Implying guest %s is down (action %d) "
407  "after container %s is stopped (action %d)",
408  pe__node_name(node), stonith_op->id,
409  container->id, stop->id);
410  } else {
411  /* If we're fencing the guest node but there's no stop for the guest
412  * resource, we must think the guest is already stopped. However, we may
413  * think so because its resource history was just cleaned. To avoid
414  * unnecessarily considering the guest node down if it's really up,
415  * order the pseudo-fencing after any stop of the connection resource,
416  * which will be ordered after any container (re-)probe.
417  */
418  stop = find_first_action(node->details->remote_rsc->actions, NULL,
419  PCMK_ACTION_STOP, NULL);
420 
421  if (stop) {
422  order_actions(stop, stonith_op, pcmk__ar_ordered);
423  crm_info("Implying guest %s is down (action %d) "
424  "after connection is stopped (action %d)",
425  pe__node_name(node), stonith_op->id, stop->id);
426  } else {
427  /* Not sure why we're fencing, but everything must already be
428  * cleanly stopped.
429  */
430  crm_info("Implying guest %s is down (action %d) ",
431  pe__node_name(node), stonith_op->id);
432  }
433  }
434 
435  // Order/imply other actions relative to pseudo-fence as with real fence
436  pcmk__order_vs_fence(stonith_op, node->details->data_set);
437 }
438 
448 bool
450 {
451  const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED);
452 
453  return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches);
454 }
455 
463 void
464 pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
465 {
466  pcmk_node_t *node = (pcmk_node_t *) data;
467  pcmk_resource_t *rsc = (pcmk_resource_t *) user_data;
468 
469  pcmk_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, true, NULL,
470  false, rsc->cluster);
471 
472  crm_debug("Ordering any stops of %s before %s, and any starts after",
473  rsc->id, unfence->uuid);
474 
475  /*
476  * It would be more efficient to order clone resources once,
477  * rather than order each instance, but ordering the instance
478  * allows us to avoid unnecessary dependencies that might conflict
479  * with user constraints.
480  *
481  * @TODO: This constraint can still produce a transition loop if the
482  * resource has a stop scheduled on the node being unfenced, and
483  * there is a user ordering constraint to start some other resource
484  * (which will be ordered after the unfence) before stopping this
485  * resource. An example is "start some slow-starting cloned service
486  * before stopping an associated virtual IP that may be moving to
487  * it":
488  * stop this -> unfencing -> start that -> stop this
489  */
490  pcmk__new_ordering(rsc, stop_key(rsc), NULL,
491  NULL, strdup(unfence->uuid), unfence,
493  rsc->cluster);
494 
495  pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
496  rsc, start_key(rsc), NULL,
499  rsc->cluster);
500 }
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:238
Whether resource has clone notifications enabled.
Definition: resources.h:115
Relation applies only if actions are on same node.
A dumping ground.
void trigger_unfencing(pcmk_resource_t *rsc, pcmk_node_t *node, const char *reason, pcmk_action_t *dependency, pcmk_scheduler_t *scheduler)
Definition: utils.c:581
Resource can be recovered after fencing.
Definition: resources.h:61
#define crm_notice(fmt, args...)
Definition: logging.h:383
GHashTable * known_on
Nodes where resource has been probed (key is node ID, not name)
Definition: resources.h:463
&#39;then&#39; is runnable (and migratable) only if &#39;first&#39; is runnable
pcmk_scheduler_t * cluster
Cluster that resource is part of.
Definition: resources.h:412
Resource can be recovered immediately.
Definition: resources.h:59
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:36
void pcmk__fence_guest(pcmk_node_t *node)
void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
char data[0]
Definition: cpg.c:55
#define pe__set_action_flags(action, flags_to_set)
Definition: internal.h:76
const char * pe_node_attribute_raw(const pcmk_node_t *node, const char *name)
Definition: common.c:621
G_GNUC_INTERNAL void pcmk__new_ordering(pcmk_resource_t *first_rsc, char *first_task, pcmk_action_t *first_action, pcmk_resource_t *then_rsc, char *then_task, pcmk_action_t *then_action, uint32_t flags, pcmk_scheduler_t *sched)
pcmk_resource_t * uber_parent(pcmk_resource_t *rsc)
Definition: complex.c:936
GList * children
Resource&#39;s child resources, if any.
Definition: resources.h:475
gboolean order_actions(pcmk_action_t *lh_action, pcmk_action_t *rh_action, uint32_t flags)
Definition: utils.c:450
#define PCMK_ACTION_ON
Definition: actions.h:63
Implementation of pcmk_action_t.
Definition: actions.h:390
pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler)
Definition: pe_actions.c:1265
If &#39;first&#39; is required, &#39;then&#39; action for instance on same node is.
int id
Counter to identify action.
Definition: actions.h:391
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1588
pcmk_scheduler_t * data_set
Cluster that node is part of.
Definition: nodes.h:126
pcmk_resource_t * container
Resource containing this one, if any.
Definition: resources.h:480
Implementation of pcmk_scheduler_t.
Definition: scheduler.h:172
Ordering applies even if &#39;first&#39; runs on guest node created by &#39;then&#39;.
const char * action
Definition: pcmk_fence.c:30
GList * resources
Resources in cluster.
Definition: scheduler.h:196
pcmk__action_relation_flags
#define CRM_ATTR_UNFENCED
Definition: crm.h:120
#define PCMK_ACTION_DEMOTE
Definition: actions.h:49
#define PCMK_ACTION_REBOOT
Definition: actions.h:67
pcmk_resource_t * parent
Resource&#39;s parent resource, if any.
Definition: resources.h:413
Whether any resource provides or requires unfencing (via CIB resources)
Definition: scheduler.h:86
pcmk_node_t * node
Node to execute action on, if any.
Definition: actions.h:401
Implementation of pcmk_resource_t.
Definition: resources.h:399
#define crm_debug(fmt, args...)
Definition: logging.h:386
Actions are ordered (optionally, if no other flags are set)
void pcmk__order_vs_unfence(const pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_action_t *action, enum pcmk__action_relation_flags order)
Primitive resource.
Definition: resources.h:34
#define stop_key(rsc)
Definition: internal.h:378
Whether resource is considered failed.
Definition: resources.h:151
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:99
struct pe_node_shared_s * details
Basic node information.
Definition: nodes.h:134
#define PCMK_ACTION_START
Definition: actions.h:71
unsigned long long flags
Group of enum pcmk_rsc_flags.
Definition: resources.h:429
#define PCMK_ACTION_STOP
Definition: actions.h:74
GList * actions
Definition: resources.h:447
#define PCMK_ACTION_STONITH
Definition: actions.h:73
char * uuid
Action key.
Definition: actions.h:404
Implementation of pcmk_node_t.
Definition: nodes.h:130
enum pe_obj_types variant
Resource variant.
Definition: resources.h:414
Resource can be recovered if quorate.
Definition: resources.h:60
const char * id
Node ID at the cluster layer.
Definition: nodes.h:67
const char * target
Definition: pcmk_fence.c:29
void pcmk__order_vs_fence(pcmk_action_t *stonith_op, pcmk_scheduler_t *scheduler)
bool pe__is_guest_node(const pcmk_node_t *node)
Definition: remote.c:33
Cluster status and scheduling.
Whether resource requires fencing before recovery if on unclean node.
Definition: resources.h:190
Whether resource&#39;s class is "stonith".
Definition: resources.h:121
Whether action is runnable.
Definition: actions.h:241
void pe__order_notifs_after_fencing(const pcmk_action_t *action, pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
Definition: pe_notif.c:992
pcmk_action_t * find_first_action(const GList *input, const char *uuid, const char *task, const pcmk_node_t *on_node)
Definition: pe_actions.c:1486
pcmk_scheduler_t * scheduler
Whether action does not require invoking an agent.
Definition: actions.h:238
#define CRM_ASSERT(expr)
Definition: results.h:42
enum pe_action_flags flags
Group of enum pe_action_flags.
Definition: actions.h:409
#define PCMK_ACTION_OFF
Definition: actions.h:62
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:37
#define start_key(rsc)
Definition: internal.h:384
unsigned long long flags
Group of enum pcmk_scheduler_flags.
Definition: scheduler.h:183
Whether resource is managed.
Definition: resources.h:106
Whether resource can be started or promoted only on unfenced nodes.
Definition: resources.h:193
#define crm_info(fmt, args...)
Definition: logging.h:384
pcmk_resource_t * remote_rsc
Remote connection resource for node, if it is a Pacemaker Remote node.
Definition: nodes.h:111
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:35
char * id
Resource ID in configuration.
Definition: resources.h:400
GHashTable * allowed_nodes
Nodes where resource may run (key is node ID, not name)
Definition: resources.h:466
bool pcmk__node_unfenced(const pcmk_node_t *node)