1 /*
2 * Copyright 2004-2023 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <glib.h>
13
14 #include <crm/crm.h>
15 #include <crm/pengine/status.h>
16 #include <pacemaker-internal.h>
17 #include "libpacemaker_private.h"
18
19 /*!
20 * \internal
21 * \brief Check whether a resource is known on a particular node
22 *
23 * \param[in] rsc Resource to check
24 * \param[in] node Node to check
25 *
26 * \return TRUE if resource (or parent if an anonymous clone) is known
27 */
28 static bool
29 rsc_is_known_on(const pe_resource_t *rsc, const pe_node_t *node)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
30 {
31 if (pe_hash_table_lookup(rsc->known_on, node->details->id)) {
32 return TRUE;
33
34 } else if ((rsc->variant == pe_native)
35 && pe_rsc_is_anon_clone(rsc->parent)
36 && pe_hash_table_lookup(rsc->parent->known_on, node->details->id)) {
37 /* We check only the parent, not the uber-parent, because we cannot
38 * assume that the resource is known if it is in an anonymously cloned
39 * group (which may be only partially known).
40 */
41 return TRUE;
42 }
43 return FALSE;
44 }
45
46 /*!
47 * \internal
48 * \brief Order a resource's start and promote actions relative to fencing
49 *
50 * \param[in,out] rsc Resource to be ordered
51 * \param[in,out] stonith_op Fence action
52 */
53 static void
54 order_start_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
55 {
56 pe_node_t *target;
57 GList *gIter = NULL;
58
59 CRM_CHECK(stonith_op && stonith_op->node, return);
60 target = stonith_op->node;
61
62 for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
63 pe_action_t *action = (pe_action_t *) gIter->data;
64
65 switch (action->needs) {
66 case rsc_req_nothing:
67 // Anything other than start or promote requires nothing
68 break;
69
70 case rsc_req_stonith:
71 order_actions(stonith_op, action, pe_order_optional);
72 break;
73
74 case rsc_req_quorum:
75 if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)
76 && pe_hash_table_lookup(rsc->allowed_nodes, target->details->id)
77 && !rsc_is_known_on(rsc, target)) {
78
79 /* If we don't know the status of the resource on the node
80 * we're about to shoot, we have to assume it may be active
81 * there. Order the resource start after the fencing. This
82 * is analogous to waiting for all the probes for a resource
83 * to complete before starting it.
84 *
85 * The most likely explanation is that the DC died and took
86 * its status with it.
87 */
88 pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid,
89 pe__node_name(target));
90 order_actions(stonith_op, action,
91 pe_order_optional | pe_order_runnable_left);
92 }
93 break;
94 }
95 }
96 }
97
98 /*!
99 * \internal
100 * \brief Order a resource's stop and demote actions relative to fencing
101 *
102 * \param[in,out] rsc Resource to be ordered
103 * \param[in,out] stonith_op Fence action
104 */
105 static void
106 order_stop_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
107 {
108 GList *gIter = NULL;
109 GList *action_list = NULL;
110 bool order_implicit = false;
111
112 pe_resource_t *top = uber_parent(rsc);
113 pe_action_t *parent_stop = NULL;
114 pe_node_t *target;
115
116 CRM_CHECK(stonith_op && stonith_op->node, return);
117 target = stonith_op->node;
118
119 /* Get a list of stop actions potentially implied by the fencing */
120 action_list = pe__resource_actions(rsc, target, RSC_STOP, FALSE);
121
122 /* If resource requires fencing, implicit actions must occur after fencing.
123 *
124 * Implied stops and demotes of resources running on guest nodes are always
125 * ordered after fencing, even if the resource does not require fencing,
126 * because guest node "fencing" is actually just a resource stop.
127 */
128 if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)
129 || pe__is_guest_node(target)) {
130
131 order_implicit = true;
132 }
133
134 if (action_list && order_implicit) {
135 parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL);
136 }
137
138 for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
139 pe_action_t *action = (pe_action_t *) gIter->data;
140
141 // The stop would never complete, so convert it into a pseudo-action.
142 pe__set_action_flags(action, pe_action_pseudo|pe_action_runnable);
143
144 if (order_implicit) {
145 pe__set_action_flags(action, pe_action_implied_by_stonith);
146
147 /* Order the stonith before the parent stop (if any).
148 *
149 * Also order the stonith before the resource stop, unless the
150 * resource is inside a bundle -- that would cause a graph loop.
151 * We can rely on the parent stop's ordering instead.
152 *
153 * User constraints must not order a resource in a guest node
154 * relative to the guest node container resource. The
155 * pe_order_preserve flag marks constraints as generated by the
156 * cluster and thus immune to that check (and is irrelevant if
157 * target is not a guest).
158 */
159 if (!pe_rsc_is_bundled(rsc)) {
160 order_actions(stonith_op, action, pe_order_preserve);
161 }
162 order_actions(stonith_op, parent_stop, pe_order_preserve);
163 }
164
165 if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
166 crm_notice("Stop of failed resource %s is implicit %s %s is fenced",
167 rsc->id, (order_implicit? "after" : "because"),
168 pe__node_name(target));
169 } else {
170 crm_info("%s is implicit %s %s is fenced",
171 action->uuid, (order_implicit? "after" : "because"),
172 pe__node_name(target));
173 }
174
175 if (pcmk_is_set(rsc->flags, pe_rsc_notify)) {
176 pe__order_notifs_after_fencing(action, rsc, stonith_op);
177 }
178
179 #if 0
180 /* It might be a good idea to stop healthy resources on a node about to
181 * be fenced, when possible.
182 *
183 * However, fencing must be done before a failed resource's
184 * (pseudo-)stop action, so that could create a loop. For example, given
185 * a group of A and B running on node N with a failed stop of B:
186 *
187 * fence N -> stop B (pseudo-op) -> stop A -> fence N
188 *
189 * The block below creates the stop A -> fence N ordering and therefore
190 * must (at least for now) be disabled. Instead, run the block above and
191 * treat all resources on N as B would be (i.e., as a pseudo-op after
192 * the fencing).
193 *
194 * @TODO Maybe break the "A requires B" dependency in
195 * pcmk__update_action_for_orderings() and use this block for healthy
196 * resources instead of the above.
197 */
198 crm_info("Moving healthy resource %s off %s before fencing",
199 rsc->id, pe__node_name(node));
200 pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL,
201 strdup(CRM_OP_FENCE), stonith_op,
202 pe_order_optional, rsc->cluster);
203 #endif
204 }
205
206 g_list_free(action_list);
207
208 /* Get a list of demote actions potentially implied by the fencing */
209 action_list = pe__resource_actions(rsc, target, RSC_DEMOTE, FALSE);
210
211 for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
212 pe_action_t *action = (pe_action_t *) gIter->data;
213
214 if (!(action->node->details->online) || action->node->details->unclean
215 || pcmk_is_set(rsc->flags, pe_rsc_failed)) {
216
217 if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
218 pe_rsc_info(rsc,
219 "Demote of failed resource %s is implicit after %s is fenced",
220 rsc->id, pe__node_name(target));
221 } else {
222 pe_rsc_info(rsc, "%s is implicit after %s is fenced",
223 action->uuid, pe__node_name(target));
224 }
225
226 /* The demote would never complete and is now implied by the
227 * fencing, so convert it into a pseudo-action.
228 */
229 pe__set_action_flags(action, pe_action_pseudo|pe_action_runnable);
230
231 if (pe_rsc_is_bundled(rsc)) {
232 // Do nothing, let recovery be ordered after parent's implied stop
233
234 } else if (order_implicit) {
235 order_actions(stonith_op, action, pe_order_preserve|pe_order_optional);
236 }
237 }
238 }
239
240 g_list_free(action_list);
241 }
242
243 /*!
244 * \internal
245 * \brief Order resource actions properly relative to fencing
246 *
247 * \param[in,out] rsc Resource whose actions should be ordered
248 * \param[in,out] stonith_op Fencing operation to be ordered against
249 */
250 static void
251 rsc_stonith_ordering(pe_resource_t *rsc, pe_action_t *stonith_op)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
252 {
253 if (rsc->children) {
254 GList *gIter = NULL;
255
256 for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
257 pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
258
259 rsc_stonith_ordering(child_rsc, stonith_op);
260 }
261
262 } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
263 pe_rsc_trace(rsc,
264 "Skipping fencing constraints for unmanaged resource: %s",
265 rsc->id);
266
267 } else {
268 order_start_vs_fencing(rsc, stonith_op);
269 order_stop_vs_fencing(rsc, stonith_op);
270 }
271 }
272
273 /*!
274 * \internal
275 * \brief Order all actions appropriately relative to a fencing operation
276 *
277 * Ensure start operations of affected resources are ordered after fencing,
278 * imply stop and demote operations of affected resources by marking them as
279 * pseudo-actions, etc.
280 *
281 * \param[in,out] stonith_op Fencing operation
282 * \param[in,out] data_set Working set of cluster
283 */
284 void
285 pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
286 {
287 CRM_CHECK(stonith_op && data_set, return);
288 for (GList *r = data_set->resources; r != NULL; r = r->next) {
289 rsc_stonith_ordering((pe_resource_t *) r->data, stonith_op);
290 }
291 }
292
293 /*!
294 * \internal
295 * \brief Order an action after unfencing
296 *
297 * \param[in] rsc Resource that action is for
298 * \param[in,out] node Node that action is on
299 * \param[in,out] action Action to be ordered after unfencing
300 * \param[in] order Ordering flags
301 */
302 void
303 pcmk__order_vs_unfence(const pe_resource_t *rsc, pe_node_t *node,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
304 pe_action_t *action, enum pe_ordering order)
305 {
306 /* When unfencing is in use, we order unfence actions before any probe or
307 * start of resources that require unfencing, and also of fence devices.
308 *
309 * This might seem to violate the principle that fence devices require
310 * only quorum. However, fence agents that unfence often don't have enough
311 * information to even probe or start unless the node is first unfenced.
312 */
313 if ((pcmk_is_set(rsc->flags, pe_rsc_fence_device)
314 && pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing))
315 || pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) {
316
317 /* Start with an optional ordering. Requiring unfencing would result in
318 * the node being unfenced, and all its resources being stopped,
319 * whenever a new resource is added -- which would be highly suboptimal.
320 */
321 pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE,
322 node->details->data_set);
323
324 order_actions(unfence, action, order);
325
326 if (!pcmk__node_unfenced(node)) {
327 // But unfencing is required if it has never been done
328 char *reason = crm_strdup_printf("required by %s %s",
329 rsc->id, action->task);
330
331 trigger_unfencing(NULL, node, reason, NULL,
332 node->details->data_set);
333 free(reason);
334 }
335 }
336 }
337
338 /*!
339 * \internal
340 * \brief Create pseudo-op for guest node fence, and order relative to it
341 *
342 * \param[in,out] node Guest node to fence
343 */
344 void
345 pcmk__fence_guest(pe_node_t *node)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
346 {
347 pe_resource_t *container = NULL;
348 pe_action_t *stop = NULL;
349 pe_action_t *stonith_op = NULL;
350
351 /* The fence action is just a label; we don't do anything differently for
352 * off vs. reboot. We specify it explicitly, rather than let it default to
353 * cluster's default action, because we are not _initiating_ fencing -- we
354 * are creating a pseudo-event to describe fencing that is already occurring
355 * by other means (container recovery).
356 */
357 const char *fence_action = "off";
358
359 CRM_ASSERT(node != NULL);
360
361 /* Check whether guest's container resource has any explicit stop or
362 * start (the stop may be implied by fencing of the guest's host).
363 */
364 container = node->details->remote_rsc->container;
365 if (container) {
366 stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP,
367 NULL);
368
369 if (find_first_action(container->actions, NULL, CRMD_ACTION_START,
370 NULL)) {
371 fence_action = "reboot";
372 }
373 }
374
375 /* Create a fence pseudo-event, so we have an event to order actions
376 * against, and the controller can always detect it.
377 */
378 stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean",
379 FALSE, node->details->data_set);
380 pe__set_action_flags(stonith_op, pe_action_pseudo|pe_action_runnable);
381
382 /* We want to imply stops/demotes after the guest is stopped, not wait until
383 * it is restarted, so we always order pseudo-fencing after stop, not start
384 * (even though start might be closer to what is done for a real reboot).
385 */
386 if ((stop != NULL) && pcmk_is_set(stop->flags, pe_action_pseudo)) {
387 pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE,
388 NULL, FALSE,
389 node->details->data_set);
390
391 crm_info("Implying guest %s is down (action %d) after %s fencing",
392 pe__node_name(node), stonith_op->id,
393 pe__node_name(stop->node));
394 order_actions(parent_stonith_op, stonith_op,
395 pe_order_runnable_left|pe_order_implies_then);
396
397 } else if (stop) {
398 order_actions(stop, stonith_op,
399 pe_order_runnable_left|pe_order_implies_then);
400 crm_info("Implying guest %s is down (action %d) "
401 "after container %s is stopped (action %d)",
402 pe__node_name(node), stonith_op->id,
403 container->id, stop->id);
404 } else {
405 /* If we're fencing the guest node but there's no stop for the guest
406 * resource, we must think the guest is already stopped. However, we may
407 * think so because its resource history was just cleaned. To avoid
408 * unnecessarily considering the guest node down if it's really up,
409 * order the pseudo-fencing after any stop of the connection resource,
410 * which will be ordered after any container (re-)probe.
411 */
412 stop = find_first_action(node->details->remote_rsc->actions, NULL,
413 RSC_STOP, NULL);
414
415 if (stop) {
416 order_actions(stop, stonith_op, pe_order_optional);
417 crm_info("Implying guest %s is down (action %d) "
418 "after connection is stopped (action %d)",
419 pe__node_name(node), stonith_op->id, stop->id);
420 } else {
421 /* Not sure why we're fencing, but everything must already be
422 * cleanly stopped.
423 */
424 crm_info("Implying guest %s is down (action %d) ",
425 pe__node_name(node), stonith_op->id);
426 }
427 }
428
429 // Order/imply other actions relative to pseudo-fence as with real fence
430 pcmk__order_vs_fence(stonith_op, node->details->data_set);
431 }
432
433 /*!
434 * \internal
435 * \brief Check whether node has already been unfenced
436 *
437 * \param[in] node Node to check
438 *
439 * \return true if node has a nonzero #node-unfenced attribute (or none),
440 * otherwise false
441 */
442 bool
443 pcmk__node_unfenced(const pe_node_t *node)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
444 {
445 const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED);
446
447 return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches);
448 }
449
450 /*!
451 * \internal
452 * \brief Order a resource's start and stop relative to unfencing of a node
453 *
454 * \param[in,out] data Node that could be unfenced
455 * \param[in,out] user_data Resource to order
456 */
457 void
458 pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
459 {
460 pe_node_t *node = (pe_node_t *) data;
461 pe_resource_t *rsc = (pe_resource_t *) user_data;
462
463 pe_action_t *unfence = pe_fence_op(node, "on", true, NULL, false,
464 rsc->cluster);
465
466 crm_debug("Ordering any stops of %s before %s, and any starts after",
467 rsc->id, unfence->uuid);
468
469 /*
470 * It would be more efficient to order clone resources once,
471 * rather than order each instance, but ordering the instance
472 * allows us to avoid unnecessary dependencies that might conflict
473 * with user constraints.
474 *
475 * @TODO: This constraint can still produce a transition loop if the
476 * resource has a stop scheduled on the node being unfenced, and
477 * there is a user ordering constraint to start some other resource
478 * (which will be ordered after the unfence) before stopping this
479 * resource. An example is "start some slow-starting cloned service
480 * before stopping an associated virtual IP that may be moving to
481 * it":
482 * stop this -> unfencing -> start that -> stop this
483 */
484 pcmk__new_ordering(rsc, stop_key(rsc), NULL,
485 NULL, strdup(unfence->uuid), unfence,
486 pe_order_optional|pe_order_same_node,
487 rsc->cluster);
488
489 pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
490 rsc, start_key(rsc), NULL,
491 pe_order_implies_then_on_node|pe_order_same_node,
492 rsc->cluster);
493 }