root/lib/pacemaker/pcmk_sched_fencing.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rsc_is_known_on
  2. order_start_vs_fencing
  3. order_stop_vs_fencing
  4. rsc_stonith_ordering
  5. pcmk__order_vs_fence
  6. pcmk__order_vs_unfence
  7. pcmk__fence_guest
  8. pcmk__node_unfenced
  9. pcmk__order_restart_vs_unfence

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <glib.h>
  13 
  14 #include <crm/crm.h>
  15 #include <crm/pengine/status.h>
  16 #include <pacemaker-internal.h>
  17 #include "libpacemaker_private.h"
  18 
  19 /*!
  20  * \internal
  21  * \brief Check whether a resource is known on a particular node
  22  *
  23  * \param[in] rsc   Resource to check
  24  * \param[in] node  Node to check
  25  *
  26  * \return TRUE if resource (or parent if an anonymous clone) is known
  27  */
  28 static bool
  29 rsc_is_known_on(const pcmk_resource_t *rsc, const pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
  30 {
  31    if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
  32        return TRUE;
  33 
  34    } else if (pcmk__is_primitive(rsc)
  35               && pcmk__is_anonymous_clone(rsc->parent)
  36               && (g_hash_table_lookup(rsc->parent->known_on,
  37                                       node->details->id) != NULL)) {
  38        /* We check only the parent, not the uber-parent, because we cannot
  39         * assume that the resource is known if it is in an anonymously cloned
  40         * group (which may be only partially known).
  41         */
  42        return TRUE;
  43    }
  44    return FALSE;
  45 }
  46 
  47 /*!
  48  * \internal
  49  * \brief Order a resource's start and promote actions relative to fencing
  50  *
  51  * \param[in,out] rsc         Resource to be ordered
  52  * \param[in,out] stonith_op  Fence action
  53  */
  54 static void
  55 order_start_vs_fencing(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
     /* [previous][next][first][last][top][bottom][index][help] */
  56 {
  57     pcmk_node_t *target;
  58 
  59     CRM_CHECK(stonith_op && stonith_op->node, return);
  60     target = stonith_op->node;
  61 
  62     for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) {
  63         pcmk_action_t *action = iter->data;
  64 
  65         switch (action->needs) {
  66             case pcmk_requires_nothing:
  67                 // Anything other than start or promote requires nothing
  68                 break;
  69 
  70             case pcmk_requires_fencing:
  71                 order_actions(stonith_op, action, pcmk__ar_ordered);
  72                 break;
  73 
  74             case pcmk_requires_quorum:
  75                 if (pcmk__str_eq(action->task, PCMK_ACTION_START,
  76                                  pcmk__str_none)
  77                     && (g_hash_table_lookup(rsc->allowed_nodes,
  78                                             target->details->id) != NULL)
  79                     && !rsc_is_known_on(rsc, target)) {
  80 
  81                     /* If we don't know the status of the resource on the node
  82                      * we're about to shoot, we have to assume it may be active
  83                      * there. Order the resource start after the fencing. This
  84                      * is analogous to waiting for all the probes for a resource
  85                      * to complete before starting it.
  86                      *
  87                      * The most likely explanation is that the DC died and took
  88                      * its status with it.
  89                      */
  90                     pcmk__rsc_debug(rsc, "Ordering %s after %s recovery",
  91                                     action->uuid, pcmk__node_name(target));
  92                     order_actions(stonith_op, action,
  93                                   pcmk__ar_ordered
  94                                   |pcmk__ar_unrunnable_first_blocks);
  95                 }
  96                 break;
  97         }
  98     }
  99 }
 100 
 101 /*!
 102  * \internal
 103  * \brief Order a resource's stop and demote actions relative to fencing
 104  *
 105  * \param[in,out] rsc         Resource to be ordered
 106  * \param[in,out] stonith_op  Fence action
 107  */
 108 static void
 109 order_stop_vs_fencing(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
     /* [previous][next][first][last][top][bottom][index][help] */
 110 {
 111     GList *iter = NULL;
 112     GList *action_list = NULL;
 113     bool order_implicit = false;
 114 
 115     pcmk_resource_t *top = uber_parent(rsc);
 116     pcmk_action_t *parent_stop = NULL;
 117     pcmk_node_t *target;
 118 
 119     CRM_CHECK(stonith_op && stonith_op->node, return);
 120     target = stonith_op->node;
 121 
 122     /* Get a list of stop actions potentially implied by the fencing */
 123     action_list = pe__resource_actions(rsc, target, PCMK_ACTION_STOP, FALSE);
 124 
 125     /* If resource requires fencing, implicit actions must occur after fencing.
 126      *
 127      * Implied stops and demotes of resources running on guest nodes are always
 128      * ordered after fencing, even if the resource does not require fencing,
 129      * because guest node "fencing" is actually just a resource stop.
 130      */
 131     if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
 132         || pcmk__is_guest_or_bundle_node(target)) {
 133 
 134         order_implicit = true;
 135     }
 136 
 137     if (action_list && order_implicit) {
 138         parent_stop = find_first_action(top->actions, NULL, PCMK_ACTION_STOP,
 139                                         NULL);
 140     }
 141 
 142     for (iter = action_list; iter != NULL; iter = iter->next) {
 143         pcmk_action_t *action = iter->data;
 144 
 145         // The stop would never complete, so convert it into a pseudo-action.
 146         pcmk__set_action_flags(action, pcmk_action_pseudo|pcmk_action_runnable);
 147 
 148         if (order_implicit) {
 149             /* Order the stonith before the parent stop (if any).
 150              *
 151              * Also order the stonith before the resource stop, unless the
 152              * resource is inside a bundle -- that would cause a graph loop.
 153              * We can rely on the parent stop's ordering instead.
 154              *
 155              * User constraints must not order a resource in a guest node
 156              * relative to the guest node container resource. The
 157              * pcmk__ar_guest_allowed flag marks constraints as generated by the
 158              * cluster and thus immune to that check (and is irrelevant if
 159              * target is not a guest).
 160              */
 161             if (!pcmk__is_bundled(rsc)) {
 162                 order_actions(stonith_op, action, pcmk__ar_guest_allowed);
 163             }
 164             order_actions(stonith_op, parent_stop, pcmk__ar_guest_allowed);
 165         }
 166 
 167         if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
 168             crm_notice("Stop of failed resource %s is implicit %s %s is fenced",
 169                        rsc->id, (order_implicit? "after" : "because"),
 170                        pcmk__node_name(target));
 171         } else {
 172             crm_info("%s is implicit %s %s is fenced",
 173                      action->uuid, (order_implicit? "after" : "because"),
 174                      pcmk__node_name(target));
 175         }
 176 
 177         if (pcmk_is_set(rsc->flags, pcmk_rsc_notify)) {
 178             pe__order_notifs_after_fencing(action, rsc, stonith_op);
 179         }
 180 
 181 #if 0
 182         /* It might be a good idea to stop healthy resources on a node about to
 183          * be fenced, when possible.
 184          *
 185          * However, fencing must be done before a failed resource's
 186          * (pseudo-)stop action, so that could create a loop. For example, given
 187          * a group of A and B running on node N with a failed stop of B:
 188          *
 189          *    fence N -> stop B (pseudo-op) -> stop A -> fence N
 190          *
 191          * The block below creates the stop A -> fence N ordering and therefore
 192          * must (at least for now) be disabled. Instead, run the block above and
 193          * treat all resources on N as B would be (i.e., as a pseudo-op after
 194          * the fencing).
 195          *
 196          * @TODO Maybe break the "A requires B" dependency in
 197          * pcmk__update_action_for_orderings() and use this block for healthy
 198          * resources instead of the above.
 199          */
 200          crm_info("Moving healthy resource %s off %s before fencing",
 201                   rsc->id, pcmk__node_name(node));
 202          pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL,
 203                             strdup(PCMK_ACTION_STONITH), stonith_op,
 204                             pcmk__ar_ordered, rsc->cluster);
 205 #endif
 206     }
 207 
 208     g_list_free(action_list);
 209 
 210     /* Get a list of demote actions potentially implied by the fencing */
 211     action_list = pe__resource_actions(rsc, target, PCMK_ACTION_DEMOTE, FALSE);
 212 
 213     for (iter = action_list; iter != NULL; iter = iter->next) {
 214         pcmk_action_t *action = iter->data;
 215 
 216         if (!(action->node->details->online) || action->node->details->unclean
 217             || pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
 218 
 219             if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
 220                 pcmk__rsc_info(rsc,
 221                                "Demote of failed resource %s is implicit "
 222                                "after %s is fenced",
 223                                rsc->id, pcmk__node_name(target));
 224             } else {
 225                 pcmk__rsc_info(rsc, "%s is implicit after %s is fenced",
 226                                action->uuid, pcmk__node_name(target));
 227             }
 228 
 229             /* The demote would never complete and is now implied by the
 230              * fencing, so convert it into a pseudo-action.
 231              */
 232             pcmk__set_action_flags(action,
 233                                    pcmk_action_pseudo|pcmk_action_runnable);
 234 
 235             if (pcmk__is_bundled(rsc)) {
 236                 // Recovery will be ordered as usual after parent's implied stop
 237 
 238             } else if (order_implicit) {
 239                 order_actions(stonith_op, action,
 240                               pcmk__ar_guest_allowed|pcmk__ar_ordered);
 241             }
 242         }
 243     }
 244 
 245     g_list_free(action_list);
 246 }
 247 
 248 /*!
 249  * \internal
 250  * \brief Order resource actions properly relative to fencing
 251  *
 252  * \param[in,out] rsc         Resource whose actions should be ordered
 253  * \param[in,out] stonith_op  Fencing operation to be ordered against
 254  */
 255 static void
 256 rsc_stonith_ordering(pcmk_resource_t *rsc, pcmk_action_t *stonith_op)
     /* [previous][next][first][last][top][bottom][index][help] */
 257 {
 258     if (rsc->children) {
 259         for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
 260             pcmk_resource_t *child_rsc = iter->data;
 261 
 262             rsc_stonith_ordering(child_rsc, stonith_op);
 263         }
 264 
 265     } else if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
 266         pcmk__rsc_trace(rsc,
 267                         "Skipping fencing constraints for unmanaged resource: "
 268                         "%s", rsc->id);
 269 
 270     } else {
 271         order_start_vs_fencing(rsc, stonith_op);
 272         order_stop_vs_fencing(rsc, stonith_op);
 273     }
 274 }
 275 
 276 /*!
 277  * \internal
 278  * \brief Order all actions appropriately relative to a fencing operation
 279  *
 280  * Ensure start operations of affected resources are ordered after fencing,
 281  * imply stop and demote operations of affected resources by marking them as
 282  * pseudo-actions, etc.
 283  *
 284  * \param[in,out] stonith_op  Fencing operation
 285  * \param[in,out] scheduler   Scheduler data
 286  */
 287 void
 288 pcmk__order_vs_fence(pcmk_action_t *stonith_op, pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 289 {
 290     CRM_CHECK(stonith_op && scheduler, return);
 291     for (GList *r = scheduler->resources; r != NULL; r = r->next) {
 292         rsc_stonith_ordering((pcmk_resource_t *) r->data, stonith_op);
 293     }
 294 }
 295 
 296 /*!
 297  * \internal
 298  * \brief Order an action after unfencing
 299  *
 300  * \param[in]     rsc       Resource that action is for
 301  * \param[in,out] node      Node that action is on
 302  * \param[in,out] action    Action to be ordered after unfencing
 303  * \param[in]     order     Ordering flags
 304  */
 305 void
 306 pcmk__order_vs_unfence(const pcmk_resource_t *rsc, pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
 307                        pcmk_action_t *action,
 308                        enum pcmk__action_relation_flags order)
 309 {
 310     /* When unfencing is in use, we order unfence actions before any probe or
 311      * start of resources that require unfencing, and also of fence devices.
 312      *
 313      * This might seem to violate the principle that fence devices require
 314      * only quorum. However, fence agents that unfence often don't have enough
 315      * information to even probe or start unless the node is first unfenced.
 316      */
 317     if ((pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)
 318          && pcmk_is_set(rsc->cluster->flags, pcmk_sched_enable_unfencing))
 319         || pcmk_is_set(rsc->flags, pcmk_rsc_needs_unfencing)) {
 320 
 321         /* Start with an optional ordering. Requiring unfencing would result in
 322          * the node being unfenced, and all its resources being stopped,
 323          * whenever a new resource is added -- which would be highly suboptimal.
 324          */
 325         pcmk_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, TRUE, NULL,
 326                                            FALSE, node->details->data_set);
 327 
 328         order_actions(unfence, action, order);
 329 
 330         if (!pcmk__node_unfenced(node)) {
 331             // But unfencing is required if it has never been done
 332             char *reason = crm_strdup_printf("required by %s %s",
 333                                              rsc->id, action->task);
 334 
 335             trigger_unfencing(NULL, node, reason, NULL,
 336                               node->details->data_set);
 337             free(reason);
 338         }
 339     }
 340 }
 341 
 342 /*!
 343  * \internal
 344  * \brief Create pseudo-op for guest node fence, and order relative to it
 345  *
 346  * \param[in,out] node  Guest node to fence
 347  */
 348 void
 349 pcmk__fence_guest(pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 350 {
 351     pcmk_resource_t *container = NULL;
 352     pcmk_action_t *stop = NULL;
 353     pcmk_action_t *stonith_op = NULL;
 354 
 355     /* The fence action is just a label; we don't do anything differently for
 356      * off vs. reboot. We specify it explicitly, rather than let it default to
 357      * cluster's default action, because we are not _initiating_ fencing -- we
 358      * are creating a pseudo-event to describe fencing that is already occurring
 359      * by other means (container recovery).
 360      */
 361     const char *fence_action = PCMK_ACTION_OFF;
 362 
 363     pcmk__assert(node != NULL);
 364 
 365     /* Check whether guest's container resource has any explicit stop or
 366      * start (the stop may be implied by fencing of the guest's host).
 367      */
 368     container = node->details->remote_rsc->container;
 369     if (container) {
 370         stop = find_first_action(container->actions, NULL, PCMK_ACTION_STOP,
 371                                  NULL);
 372 
 373         if (find_first_action(container->actions, NULL, PCMK_ACTION_START,
 374                               NULL)) {
 375             fence_action = PCMK_ACTION_REBOOT;
 376         }
 377     }
 378 
 379     /* Create a fence pseudo-event, so we have an event to order actions
 380      * against, and the controller can always detect it.
 381      */
 382     stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean",
 383                              FALSE, node->details->data_set);
 384     pcmk__set_action_flags(stonith_op, pcmk_action_pseudo|pcmk_action_runnable);
 385 
 386     /* We want to imply stops/demotes after the guest is stopped, not wait until
 387      * it is restarted, so we always order pseudo-fencing after stop, not start
 388      * (even though start might be closer to what is done for a real reboot).
 389      */
 390     if ((stop != NULL) && pcmk_is_set(stop->flags, pcmk_action_pseudo)) {
 391         pcmk_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE,
 392                                                      NULL, FALSE,
 393                                                      node->details->data_set);
 394 
 395         crm_info("Implying guest %s is down (action %d) after %s fencing",
 396                  pcmk__node_name(node), stonith_op->id,
 397                  pcmk__node_name(stop->node));
 398         order_actions(parent_stonith_op, stonith_op,
 399                       pcmk__ar_unrunnable_first_blocks
 400                       |pcmk__ar_first_implies_then);
 401 
 402     } else if (stop) {
 403         order_actions(stop, stonith_op,
 404                       pcmk__ar_unrunnable_first_blocks
 405                       |pcmk__ar_first_implies_then);
 406         crm_info("Implying guest %s is down (action %d) "
 407                  "after container %s is stopped (action %d)",
 408                  pcmk__node_name(node), stonith_op->id,
 409                  container->id, stop->id);
 410     } else {
 411         /* If we're fencing the guest node but there's no stop for the guest
 412          * resource, we must think the guest is already stopped. However, we may
 413          * think so because its resource history was just cleaned. To avoid
 414          * unnecessarily considering the guest node down if it's really up,
 415          * order the pseudo-fencing after any stop of the connection resource,
 416          * which will be ordered after any container (re-)probe.
 417          */
 418         stop = find_first_action(node->details->remote_rsc->actions, NULL,
 419                                  PCMK_ACTION_STOP, NULL);
 420 
 421         if (stop) {
 422             order_actions(stop, stonith_op, pcmk__ar_ordered);
 423             crm_info("Implying guest %s is down (action %d) "
 424                      "after connection is stopped (action %d)",
 425                      pcmk__node_name(node), stonith_op->id, stop->id);
 426         } else {
 427             /* Not sure why we're fencing, but everything must already be
 428              * cleanly stopped.
 429              */
 430             crm_info("Implying guest %s is down (action %d) ",
 431                      pcmk__node_name(node), stonith_op->id);
 432         }
 433     }
 434 
 435     // Order/imply other actions relative to pseudo-fence as with real fence
 436     pcmk__order_vs_fence(stonith_op, node->details->data_set);
 437 }
 438 
 439 /*!
 440  * \internal
 441  * \brief Check whether node has already been unfenced
 442  *
 443  * \param[in] node  Node to check
 444  *
 445  * \return true if node has a nonzero #node-unfenced attribute (or none),
 446  *         otherwise false
 447  */
 448 bool
 449 pcmk__node_unfenced(const pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 450 {
 451     const char *unfenced = pcmk__node_attr(node, CRM_ATTR_UNFENCED, NULL,
 452                                            pcmk__rsc_node_current);
 453 
 454     return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches);
 455 }
 456 
 457 /*!
 458  * \internal
 459  * \brief Order a resource's start and stop relative to unfencing of a node
 460  *
 461  * \param[in,out] data       Node that could be unfenced
 462  * \param[in,out] user_data  Resource to order
 463  */
 464 void
 465 pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 466 {
 467     pcmk_node_t *node = (pcmk_node_t *) data;
 468     pcmk_resource_t *rsc = (pcmk_resource_t *) user_data;
 469 
 470     pcmk_action_t *unfence = pe_fence_op(node, PCMK_ACTION_ON, true, NULL,
 471                                          false, rsc->cluster);
 472 
 473     crm_debug("Ordering any stops of %s before %s, and any starts after",
 474               rsc->id, unfence->uuid);
 475 
 476     /*
 477      * It would be more efficient to order clone resources once,
 478      * rather than order each instance, but ordering the instance
 479      * allows us to avoid unnecessary dependencies that might conflict
 480      * with user constraints.
 481      *
 482      * @TODO: This constraint can still produce a transition loop if the
 483      * resource has a stop scheduled on the node being unfenced, and
 484      * there is a user ordering constraint to start some other resource
 485      * (which will be ordered after the unfence) before stopping this
 486      * resource. An example is "start some slow-starting cloned service
 487      * before stopping an associated virtual IP that may be moving to
 488      * it":
 489      *       stop this -> unfencing -> start that -> stop this
 490      */
 491     pcmk__new_ordering(rsc, stop_key(rsc), NULL,
 492                        NULL, strdup(unfence->uuid), unfence,
 493                        pcmk__ar_ordered|pcmk__ar_if_on_same_node,
 494                        rsc->cluster);
 495 
 496     pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
 497                        rsc, start_key(rsc), NULL,
 498                        pcmk__ar_first_implies_same_node_then
 499                        |pcmk__ar_if_on_same_node,
 500                        rsc->cluster);
 501 }

/* [previous][next][first][last][top][bottom][index][help] */