pacemaker  2.1.2-ada5c3b36
Scalable High-Availability cluster resource manager
pcmk_sched_fencing.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2021 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <glib.h>
13 
14 #include <crm/crm.h>
15 #include <crm/pengine/status.h>
16 #include <pacemaker-internal.h>
17 #include "libpacemaker_private.h"
18 
28 static bool
29 rsc_is_known_on(pe_resource_t *rsc, const pe_node_t *node)
30 {
31  if (pe_hash_table_lookup(rsc->known_on, node->details->id)) {
32  return TRUE;
33 
34  } else if ((rsc->variant == pe_native)
35  && pe_rsc_is_anon_clone(rsc->parent)
36  && pe_hash_table_lookup(rsc->parent->known_on, node->details->id)) {
37  /* We check only the parent, not the uber-parent, because we cannot
38  * assume that the resource is known if it is in an anonymously cloned
39  * group (which may be only partially known).
40  */
41  return TRUE;
42  }
43  return FALSE;
44 }
45 
54 static void
55 order_start_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op,
56  pe_working_set_t *data_set)
57 {
59  GList *gIter = NULL;
60 
61  CRM_CHECK(stonith_op && stonith_op->node, return);
62  target = stonith_op->node;
63 
64  for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
65  pe_action_t *action = (pe_action_t *) gIter->data;
66 
67  switch (action->needs) {
68  case rsc_req_nothing:
69  // Anything other than start or promote requires nothing
70  break;
71 
72  case rsc_req_stonith:
74  break;
75 
76  case rsc_req_quorum:
77  if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)
78  && pe_hash_table_lookup(rsc->allowed_nodes, target->details->id)
79  && !rsc_is_known_on(rsc, target)) {
80 
81  /* If we don't know the status of the resource on the node
82  * we're about to shoot, we have to assume it may be active
83  * there. Order the resource start after the fencing. This
84  * is analogous to waiting for all the probes for a resource
85  * to complete before starting it.
86  *
87  * The most likely explanation is that the DC died and took
88  * its status with it.
89  */
90  pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid,
91  target->details->uname);
92  order_actions(stonith_op, action,
94  }
95  break;
96  }
97  }
98 }
99 
108 static void
109 order_stop_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op,
110  pe_working_set_t *data_set)
111 {
112  GList *gIter = NULL;
113  GList *action_list = NULL;
114  bool order_implicit = false;
115 
116  pe_resource_t *top = uber_parent(rsc);
117  pe_action_t *parent_stop = NULL;
118  pe_node_t *target;
119 
120  CRM_CHECK(stonith_op && stonith_op->node, return);
121  target = stonith_op->node;
122 
123  /* Get a list of stop actions potentially implied by the fencing */
124  action_list = pe__resource_actions(rsc, target, RSC_STOP, FALSE);
125 
126  /* If resource requires fencing, implicit actions must occur after fencing.
127  *
128  * Implied stops and demotes of resources running on guest nodes are always
129  * ordered after fencing, even if the resource does not require fencing,
130  * because guest node "fencing" is actually just a resource stop.
131  */
134 
135  order_implicit = true;
136  }
137 
138  if (action_list && order_implicit) {
139  parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL);
140  }
141 
142  for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
143  pe_action_t *action = (pe_action_t *) gIter->data;
144 
145  // The stop would never complete, so convert it into a pseudo-action.
147 
148  if (order_implicit) {
150 
151  /* Order the stonith before the parent stop (if any).
152  *
153  * Also order the stonith before the resource stop, unless the
154  * resource is inside a bundle -- that would cause a graph loop.
155  * We can rely on the parent stop's ordering instead.
156  *
157  * User constraints must not order a resource in a guest node
158  * relative to the guest node container resource. The
159  * pe_order_preserve flag marks constraints as generated by the
160  * cluster and thus immune to that check (and is irrelevant if
161  * target is not a guest).
162  */
163  if (!pe_rsc_is_bundled(rsc)) {
164  order_actions(stonith_op, action, pe_order_preserve);
165  }
166  order_actions(stonith_op, parent_stop, pe_order_preserve);
167  }
168 
169  if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
170  crm_notice("Stop of failed resource %s is implicit %s %s is fenced",
171  rsc->id, (order_implicit? "after" : "because"),
172  target->details->uname);
173  } else {
174  crm_info("%s is implicit %s %s is fenced",
175  action->uuid, (order_implicit? "after" : "because"),
176  target->details->uname);
177  }
178 
179  if (pcmk_is_set(rsc->flags, pe_rsc_notify)) {
180  /* Create a second notification that will be delivered
181  * immediately after the node is fenced
182  *
183  * Basic problem:
184  * - C is a clone active on the node to be shot and stopping on another
185  * - R is a resource that depends on C
186  *
187  * + C.stop depends on R.stop
188  * + C.stopped depends on STONITH
189  * + C.notify depends on C.stopped
190  * + C.healthy depends on C.notify
191  * + R.stop depends on C.healthy
192  *
193  * The extra notification here changes
194  * + C.healthy depends on C.notify
195  * into:
196  * + C.healthy depends on C.notify'
197  * + C.notify' depends on STONITH'
198  * thus breaking the loop
199  */
200  create_secondary_notification(action, rsc, stonith_op, data_set);
201  }
202 
203 #if 0
204  /* It might be a good idea to stop healthy resources on a node about to
205  * be fenced, when possible.
206  *
207  * However, fencing must be done before a failed resource's
208  * (pseudo-)stop action, so that could create a loop. For example, given
209  * a group of A and B running on node N with a failed stop of B:
210  *
211  * fence N -> stop B (pseudo-op) -> stop A -> fence N
212  *
213  * The block below creates the stop A -> fence N ordering and therefore
214  * must (at least for now) be disabled. Instead, run the block above and
215  * treat all resources on N as B would be (i.e., as a pseudo-op after
216  * the fencing).
217  *
218  * @TODO Maybe break the "A requires B" dependency in update_action()
219  * and use this block for healthy resources instead of the above.
220  */
221  crm_info("Moving healthy resource %s off %s before fencing",
222  rsc->id, node->details->uname);
223  pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL,
224  strdup(CRM_OP_FENCE), stonith_op,
225  pe_order_optional, data_set);
226 #endif
227  }
228 
229  g_list_free(action_list);
230 
231  /* Get a list of demote actions potentially implied by the fencing */
232  action_list = pe__resource_actions(rsc, target, RSC_DEMOTE, FALSE);
233 
234  for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
235  pe_action_t *action = (pe_action_t *) gIter->data;
236 
237  if (!(action->node->details->online) || action->node->details->unclean
238  || pcmk_is_set(rsc->flags, pe_rsc_failed)) {
239 
240  if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
241  pe_rsc_info(rsc,
242  "Demote of failed resource %s is implicit after %s is fenced",
243  rsc->id, target->details->uname);
244  } else {
245  pe_rsc_info(rsc, "%s is implicit after %s is fenced",
246  action->uuid, target->details->uname);
247  }
248 
249  /* The demote would never complete and is now implied by the
250  * fencing, so convert it into a pseudo-action.
251  */
253 
254  if (pe_rsc_is_bundled(rsc)) {
255  // Do nothing, let recovery be ordered after parent's implied stop
256 
257  } else if (order_implicit) {
259  }
260  }
261  }
262 
263  g_list_free(action_list);
264 }
265 
274 static void
275 rsc_stonith_ordering(pe_resource_t *rsc, pe_action_t *stonith_op,
276  pe_working_set_t *data_set)
277 {
278  if (rsc->children) {
279  GList *gIter = NULL;
280 
281  for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
282  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
283 
284  rsc_stonith_ordering(child_rsc, stonith_op, data_set);
285  }
286 
287  } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
288  pe_rsc_trace(rsc,
289  "Skipping fencing constraints for unmanaged resource: %s",
290  rsc->id);
291 
292  } else {
293  order_start_vs_fencing(rsc, stonith_op, data_set);
294  order_stop_vs_fencing(rsc, stonith_op, data_set);
295  }
296 }
297 
309 void
311 {
312  CRM_CHECK(stonith_op && data_set, return);
313  for (GList *r = data_set->resources; r != NULL; r = r->next) {
314  rsc_stonith_ordering((pe_resource_t *) r->data, stonith_op, data_set);
315  }
316 }
317 
328 void
330  enum pe_ordering order, pe_working_set_t *data_set)
331 {
332  /* When unfencing is in use, we order unfence actions before any probe or
333  * start of resources that require unfencing, and also of fence devices.
334  *
335  * This might seem to violate the principle that fence devices require
336  * only quorum. However, fence agents that unfence often don't have enough
337  * information to even probe or start unless the node is first unfenced.
338  */
339  if (pcmk__is_unfence_device(rsc, data_set)
341 
342  /* Start with an optional ordering. Requiring unfencing would result in
343  * the node being unfenced, and all its resources being stopped,
344  * whenever a new resource is added -- which would be highly suboptimal.
345  */
346  pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE, data_set);
347 
348  order_actions(unfence, action, order);
349 
350  if (!pcmk__node_unfenced(node)) {
351  // But unfencing is required if it has never been done
352  char *reason = crm_strdup_printf("required by %s %s",
353  rsc->id, action->task);
354 
355  trigger_unfencing(NULL, node, reason, NULL, data_set);
356  free(reason);
357  }
358  }
359 }
360 
368 void
370 {
371  pe_resource_t *container = node->details->remote_rsc->container;
372  pe_action_t *stop = NULL;
373  pe_action_t *stonith_op = NULL;
374 
375  /* The fence action is just a label; we don't do anything differently for
376  * off vs. reboot. We specify it explicitly, rather than let it default to
377  * cluster's default action, because we are not _initiating_ fencing -- we
378  * are creating a pseudo-event to describe fencing that is already occurring
379  * by other means (container recovery).
380  */
381  const char *fence_action = "off";
382 
383  /* Check whether guest's container resource has any explicit stop or
384  * start (the stop may be implied by fencing of the guest's host).
385  */
386  if (container) {
387  stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP,
388  NULL);
389 
390  if (find_first_action(container->actions, NULL, CRMD_ACTION_START,
391  NULL)) {
392  fence_action = "reboot";
393  }
394  }
395 
396  /* Create a fence pseudo-event, so we have an event to order actions
397  * against, and the controller can always detect it.
398  */
399  stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean",
400  FALSE, data_set);
402 
403  /* We want to imply stops/demotes after the guest is stopped, not wait until
404  * it is restarted, so we always order pseudo-fencing after stop, not start
405  * (even though start might be closer to what is done for a real reboot).
406  */
407  if ((stop != NULL) && pcmk_is_set(stop->flags, pe_action_pseudo)) {
408  pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE,
409  NULL, FALSE, data_set);
410 
411  crm_info("Implying guest node %s is down (action %d) after %s fencing",
412  node->details->uname, stonith_op->id,
413  stop->node->details->uname);
414  order_actions(parent_stonith_op, stonith_op,
416 
417  } else if (stop) {
418  order_actions(stop, stonith_op,
420  crm_info("Implying guest node %s is down (action %d) "
421  "after container %s is stopped (action %d)",
422  node->details->uname, stonith_op->id,
423  container->id, stop->id);
424  } else {
425  /* If we're fencing the guest node but there's no stop for the guest
426  * resource, we must think the guest is already stopped. However, we may
427  * think so because its resource history was just cleaned. To avoid
428  * unnecessarily considering the guest node down if it's really up,
429  * order the pseudo-fencing after any stop of the connection resource,
430  * which will be ordered after any container (re-)probe.
431  */
432  stop = find_first_action(node->details->remote_rsc->actions, NULL,
433  RSC_STOP, NULL);
434 
435  if (stop) {
436  order_actions(stop, stonith_op, pe_order_optional);
437  crm_info("Implying guest node %s is down (action %d) "
438  "after connection is stopped (action %d)",
439  node->details->uname, stonith_op->id, stop->id);
440  } else {
441  /* Not sure why we're fencing, but everything must already be
442  * cleanly stopped.
443  */
444  crm_info("Implying guest node %s is down (action %d) ",
445  node->details->uname, stonith_op->id);
446  }
447  }
448 
449  // Order/imply other actions relative to pseudo-fence as with real fence
450  pcmk__order_vs_fence(stonith_op, data_set);
451 }
452 
462 bool
464 {
465  const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED);
466 
467  return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches);
468 }
469 
480 bool
482  const pe_working_set_t *data_set)
483 {
486 }
bool pcmk__node_unfenced(pe_node_t *node)
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:225
#define RSC_STOP
Definition: crm.h:204
A dumping ground.
#define crm_notice(fmt, args...)
Definition: logging.h:359
GHashTable * known_on
Definition: pe_types.h:367
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:19
#define pe__set_action_flags(action, flags_to_set)
Definition: internal.h:59
#define CRM_OP_FENCE
Definition: crm.h:145
pe_resource_t * container
Definition: pe_types.h:380
GList * children
Definition: pe_types.h:377
void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set)
G_GNUC_INTERNAL void pcmk__new_ordering(pe_resource_t *lh_rsc, char *lh_task, pe_action_t *lh_action, pe_resource_t *rh_rsc, char *rh_task, pe_action_t *rh_action, enum pe_ordering type, pe_working_set_t *data_set)
pe_resource_t * remote_rsc
Definition: pe_types.h:230
#define pe_rsc_notify
Definition: pe_types.h:253
bool pcmk__is_unfence_device(const pe_resource_t *rsc, const pe_working_set_t *data_set)
#define RSC_START
Definition: crm.h:201
const char * action
Definition: pcmk_fence.c:30
GList * resources
Definition: pe_types.h:158
void trigger_unfencing(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t *data_set)
Definition: utils.c:2242
#define CRMD_ACTION_START
Definition: crm.h:176
#define CRM_ATTR_UNFENCED
Definition: crm.h:121
#define CRMD_ACTION_STOP
Definition: crm.h:179
#define pe_rsc_failed
Definition: pe_types.h:267
pe_resource_t * uber_parent(pe_resource_t *rsc)
Definition: complex.c:903
bool pe__is_guest_node(const pe_node_t *node)
Definition: remote.c:33
#define stop_key(rsc)
Definition: internal.h:376
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:114
struct pe_node_shared_s * details
Definition: pe_types.h:244
pe_node_t * node
Definition: pe_types.h:411
#define pe_rsc_needs_fencing
Definition: pe_types.h:280
gboolean order_actions(pe_action_t *lh_action, pe_action_t *rh_action, enum pe_ordering order)
Definition: utils.c:1905
unsigned long long flags
Definition: pe_types.h:348
const char * uname
Definition: pe_types.h:209
const char * pe_node_attribute_raw(pe_node_t *node, const char *name)
Definition: common.c:635
GList * actions
Definition: pe_types.h:359
enum pe_obj_types variant
Definition: pe_types.h:331
void pcmk__order_vs_unfence(pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order, pe_working_set_t *data_set)
const char * id
Definition: pe_types.h:208
#define pe_rsc_fence_device
Definition: pe_types.h:255
const char * target
Definition: pcmk_fence.c:29
void pcmk__fence_guest(pe_node_t *node, pe_working_set_t *data_set)
Cluster status and scheduling.
#define pe_rsc_needs_unfencing
Definition: pe_types.h:281
enum pe_action_flags flags
Definition: pe_types.h:419
#define pe_flag_enable_unfencing
Definition: pe_types.h:100
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:20
unsigned long long flags
Definition: pe_types.h:146
GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: utils.c:1643
#define crm_info(fmt, args...)
Definition: logging.h:360
#define pe_rsc_managed
Definition: pe_types.h:249
pe_action_t * find_first_action(GList *input, const char *uuid, const char *task, pe_node_t *on_node)
Definition: utils.c:1540
pe_ordering
Definition: pe_types.h:483
pe_resource_t * parent
Definition: pe_types.h:329
pe_action_t * pe_fence_op(pe_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t *data_set)
Definition: utils.c:2135
#define RSC_DEMOTE
Definition: crm.h:209
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:18
void create_secondary_notification(pe_action_t *action, pe_resource_t *rsc, pe_action_t *stonith_op, pe_working_set_t *data_set)
char * id
Definition: pe_types.h:322
GHashTable * allowed_nodes
Definition: pe_types.h:368