root/lib/pacemaker/pcmk_sched_probes.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_expected_result
  2. pcmk__probe_resource_list
  3. probe_then_start
  4. guest_resource_will_stop
  5. probe_action
  6. pcmk__probe_rsc_on_node
  7. probe_needed_before_action
  8. add_probe_orderings_for_stops
  9. add_start_orderings_for_probe
  10. add_restart_orderings_for_probe
  11. clear_actions_tracking_flag
  12. add_start_restart_orderings_for_rsc
  13. order_then_probes
  14. pcmk__order_probes
  15. pcmk__schedule_probes

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <glib.h>
  13 
  14 #include <crm/crm.h>
  15 #include <crm/pengine/status.h>
  16 #include <pacemaker-internal.h>
  17 #include "libpacemaker_private.h"
  18 
  19 /*!
  20  * \internal
  21  * \brief Add the expected result to a newly created probe
  22  *
  23  * \param[in,out] probe  Probe action to add expected result to
  24  * \param[in]     rsc    Resource that probe is for
  25  * \param[in]     node   Node that probe will run on
  26  */
  27 static void
  28 add_expected_result(pcmk_action_t *probe, const pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
  29                     const pcmk_node_t *node)
  30 {
  31     // Check whether resource is currently active on node
  32     pcmk_node_t *running = pe_find_node_id(rsc->priv->active_nodes,
  33                                            node->priv->id);
  34 
  35     // The expected result is what we think the resource's current state is
  36     if (running == NULL) {
  37         pe__add_action_expected_result(probe, CRM_EX_NOT_RUNNING);
  38 
  39     } else if (rsc->priv->orig_role == pcmk_role_promoted) {
  40         pe__add_action_expected_result(probe, CRM_EX_PROMOTED);
  41     }
  42 }
  43 
  44 /*!
  45  * \internal
  46  * \brief Create any needed robes on a node for a list of resources
  47  *
  48  * \param[in,out] rscs  List of resources to create probes for
  49  * \param[in,out] node  Node to create probes on
  50  *
  51  * \return true if any probe was created, otherwise false
  52  */
  53 bool
  54 pcmk__probe_resource_list(GList *rscs, pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
  55 {
  56     bool any_created = false;
  57 
  58     for (GList *iter = rscs; iter != NULL; iter = iter->next) {
  59         pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
  60 
  61         if (rsc->priv->cmds->create_probe(rsc, node)) {
  62             any_created = true;
  63         }
  64     }
  65     return any_created;
  66 }
  67 
  68 /*!
  69  * \internal
  70  * \brief Order one resource's start after another's start-up probe
  71  *
  72  * \param[in,out] rsc1  Resource that might get start-up probe
  73  * \param[in]     rsc2  Resource that might be started
  74  */
  75 static void
  76 probe_then_start(pcmk_resource_t *rsc1, pcmk_resource_t *rsc2)
     /* [previous][next][first][last][top][bottom][index][help] */
  77 {
  78     const pcmk_node_t *rsc1_node = rsc1->priv->assigned_node;
  79 
  80     if ((rsc1_node != NULL)
  81         && (g_hash_table_lookup(rsc1->priv->probed_nodes,
  82                                 rsc1_node->priv->id) == NULL)) {
  83 
  84         pcmk__new_ordering(rsc1,
  85                            pcmk__op_key(rsc1->id, PCMK_ACTION_MONITOR, 0),
  86                            NULL,
  87                            rsc2, pcmk__op_key(rsc2->id, PCMK_ACTION_START, 0),
  88                            NULL,
  89                            pcmk__ar_ordered, rsc1->priv->scheduler);
  90     }
  91 }
  92 
  93 /*!
  94  * \internal
  95  * \brief Check whether a guest resource will stop
  96  *
  97  * \param[in] node  Guest node to check
  98  *
  99  * \return true if guest resource will likely stop, otherwise false
 100  */
 101 static bool
 102 guest_resource_will_stop(const pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 103 {
 104     const pcmk_resource_t *guest_rsc = NULL;
 105     const pcmk_node_t *guest_node = NULL;
 106 
 107     guest_rsc = node->priv->remote->priv->launcher;
 108     guest_node = guest_rsc->priv->assigned_node;
 109 
 110     /* Ideally, we'd check whether the guest has a required stop, but that
 111      * information doesn't exist yet, so approximate it ...
 112      */
 113     return pcmk_is_set(node->priv->flags, pcmk__node_remote_reset)
 114            || node->details->unclean
 115            || pcmk_is_set(guest_rsc->flags, pcmk__rsc_failed)
 116            || (guest_rsc->priv->next_role == pcmk_role_stopped)
 117 
 118            // Guest is moving
 119            || ((guest_rsc->priv->orig_role > pcmk_role_stopped)
 120                && (guest_node != NULL)
 121                && pcmk__find_node_in_list(guest_rsc->priv->active_nodes,
 122                                           guest_node->priv->name) == NULL);
 123 }
 124 
 125 /*!
 126  * \internal
 127  * \brief Create a probe action for a resource on a node
 128  *
 129  * \param[in,out] rsc   Resource to create probe for
 130  * \param[in,out] node  Node to create probe on
 131  *
 132  * \return Newly created probe action
 133  */
 134 static pcmk_action_t *
 135 probe_action(pcmk_resource_t *rsc, pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 136 {
 137     pcmk_action_t *probe = NULL;
 138     char *key = pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0);
 139 
 140     crm_debug("Scheduling probe of %s %s on %s",
 141               pcmk_role_text(rsc->priv->orig_role), rsc->id,
 142               pcmk__node_name(node));
 143 
 144     probe = custom_action(rsc, key, PCMK_ACTION_MONITOR, node, FALSE,
 145                           rsc->priv->scheduler);
 146     pcmk__clear_action_flags(probe, pcmk__action_optional);
 147 
 148     pcmk__order_vs_unfence(rsc, node, probe, pcmk__ar_ordered);
 149     add_expected_result(probe, rsc, node);
 150     return probe;
 151 }
 152 
 153 /*!
 154  * \internal
 155  * \brief Create probes for a resource on a node, if needed
 156  *
 157  * \brief Schedule any probes needed for a resource on a node
 158  *
 159  * \param[in,out] rsc   Resource to create probe for
 160  * \param[in,out] node  Node to create probe on
 161  *
 162  * \return true if any probe was created, otherwise false
 163  */
 164 bool
 165 pcmk__probe_rsc_on_node(pcmk_resource_t *rsc, pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 166 {
 167     uint32_t flags = pcmk__ar_ordered;
 168     pcmk_action_t *probe = NULL;
 169     pcmk_node_t *allowed = NULL;
 170     pcmk_resource_t *top = uber_parent(rsc);
 171     const char *reason = NULL;
 172 
 173     pcmk__assert((rsc != NULL) && (node != NULL));
 174 
 175     if (!pcmk_is_set(rsc->priv->scheduler->flags,
 176                      pcmk__sched_probe_resources)) {
 177         reason = "start-up probes are disabled";
 178         goto no_probe;
 179     }
 180 
 181     if (pcmk__is_pacemaker_remote_node(node)) {
 182         if (pcmk_is_set(rsc->flags, pcmk__rsc_fence_device)) {
 183             reason = "Pacemaker Remote nodes cannot run stonith agents";
 184             goto no_probe;
 185 
 186         } else if (pcmk__is_guest_or_bundle_node(node)
 187                    && pe__resource_contains_guest_node(rsc->priv->scheduler,
 188                                                        rsc)) {
 189             reason = "guest nodes cannot run resources containing guest nodes";
 190             goto no_probe;
 191 
 192         } else if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
 193             reason = "Pacemaker Remote nodes cannot host remote connections";
 194             goto no_probe;
 195         }
 196     }
 197 
 198     // If this is a collective resource, probes are created for its children
 199     if (rsc->priv->children != NULL) {
 200         return pcmk__probe_resource_list(rsc->priv->children, node);
 201     }
 202 
 203     if ((rsc->priv->launcher != NULL)
 204         && !pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) {
 205         reason = "resource is inside a container";
 206         goto no_probe;
 207 
 208     } else if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
 209         reason = "resource is orphaned";
 210         goto no_probe;
 211 
 212     } else if (g_hash_table_lookup(rsc->priv->probed_nodes,
 213                                    node->priv->id) != NULL) {
 214         reason = "resource state is already known";
 215         goto no_probe;
 216     }
 217 
 218     allowed = g_hash_table_lookup(rsc->priv->allowed_nodes,
 219                                   node->priv->id);
 220 
 221     if (pcmk_is_set(rsc->flags, pcmk__rsc_exclusive_probes)
 222         || pcmk_is_set(top->flags, pcmk__rsc_exclusive_probes)) {
 223         // Exclusive discovery is enabled ...
 224 
 225         if (allowed == NULL) {
 226             // ... but this node is not allowed to run the resource
 227             reason = "resource has exclusive discovery but is not allowed "
 228                      "on node";
 229             goto no_probe;
 230 
 231         } else if (allowed->assign->probe_mode != pcmk__probe_exclusive) {
 232             // ... but no constraint marks this node for discovery of resource
 233             reason = "resource has exclusive discovery but is not enabled "
 234                      "on node";
 235             goto no_probe;
 236         }
 237     }
 238 
 239     if (allowed == NULL) {
 240         allowed = node;
 241     }
 242     if (allowed->assign->probe_mode == pcmk__probe_never) {
 243         reason = "node has discovery disabled";
 244         goto no_probe;
 245     }
 246 
 247     if (pcmk__is_guest_or_bundle_node(node)) {
 248         pcmk_resource_t *guest = node->priv->remote->priv->launcher;
 249 
 250         if (guest->priv->orig_role == pcmk_role_stopped) {
 251             // The guest is stopped, so we know no resource is active there
 252             reason = "node's guest is stopped";
 253             probe_then_start(guest, top);
 254             goto no_probe;
 255 
 256         } else if (guest_resource_will_stop(node)) {
 257             reason = "node's guest will stop";
 258 
 259             // Order resource start after guest stop (in case it's restarting)
 260             pcmk__new_ordering(guest,
 261                                pcmk__op_key(guest->id, PCMK_ACTION_STOP, 0),
 262                                NULL, top,
 263                                pcmk__op_key(top->id, PCMK_ACTION_START, 0),
 264                                NULL, pcmk__ar_ordered, rsc->priv->scheduler);
 265             goto no_probe;
 266         }
 267     }
 268 
 269     // We've eliminated all cases where a probe is not needed, so now it is
 270     probe = probe_action(rsc, node);
 271 
 272     /* Below, we will order the probe relative to start or reload. If this is a
 273      * clone instance, the start or reload is for the entire clone rather than
 274      * just the instance. Otherwise, the start or reload is for the resource
 275      * itself.
 276      */
 277     if (!pcmk__is_clone(top)) {
 278         top = rsc;
 279     }
 280 
 281     /* Prevent a start if the resource can't be probed, but don't cause the
 282      * resource or entire clone to stop if already active.
 283      */
 284     if (!pcmk_is_set(probe->flags, pcmk__action_runnable)
 285         && (top->priv->active_nodes == NULL)) {
 286         pcmk__set_relation_flags(flags, pcmk__ar_unrunnable_first_blocks);
 287     }
 288 
 289     // Start or reload after probing the resource
 290     pcmk__new_ordering(rsc, NULL, probe,
 291                        top, pcmk__op_key(top->id, PCMK_ACTION_START, 0), NULL,
 292                        flags, rsc->priv->scheduler);
 293     pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
 294                        pcmk__ar_ordered, rsc->priv->scheduler);
 295 
 296     return true;
 297 
 298 no_probe:
 299     pcmk__rsc_trace(rsc,
 300                     "Skipping probe for %s on %s because %s",
 301                     rsc->id, node->priv->id, reason);
 302     return false;
 303 }
 304 
 305 /*!
 306  * \internal
 307  * \brief Check whether a probe should be ordered before another action
 308  *
 309  * \param[in] probe  Probe action to check
 310  * \param[in] then   Other action to check
 311  *
 312  * \return true if \p probe should be ordered before \p then, otherwise false
 313  */
 314 static bool
 315 probe_needed_before_action(const pcmk_action_t *probe,
     /* [previous][next][first][last][top][bottom][index][help] */
 316                            const pcmk_action_t *then)
 317 {
 318     // Probes on a node are performed after unfencing it, not before
 319     if (pcmk__str_eq(then->task, PCMK_ACTION_STONITH, pcmk__str_none)
 320         && pcmk__same_node(probe->node, then->node)) {
 321         const char *op = g_hash_table_lookup(then->meta,
 322                                              PCMK__META_STONITH_ACTION);
 323 
 324         if (pcmk__str_eq(op, PCMK_ACTION_ON, pcmk__str_casei)) {
 325             return false;
 326         }
 327     }
 328 
 329     // Probes should be done on a node before shutting it down
 330     if (pcmk__str_eq(then->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)
 331         && (probe->node != NULL) && (then->node != NULL)
 332         && !pcmk__same_node(probe->node, then->node)) {
 333         return false;
 334     }
 335 
 336     // Otherwise probes should always be done before any other action
 337     return true;
 338 }
 339 
 340 /*!
 341  * \internal
 342  * \brief Add implicit "probe then X" orderings for "stop then X" orderings
 343  *
 344  * If the state of a resource is not known yet, a probe will be scheduled,
 345  * expecting a "not running" result. If the probe fails, a stop will not be
 346  * scheduled until the next transition. Thus, if there are ordering constraints
 347  * like "stop this resource then do something else that's not for the same
 348  * resource", add implicit "probe this resource then do something" equivalents
 349  * so the relation is upheld until we know whether a stop is needed.
 350  *
 351  * \param[in,out] scheduler  Scheduler data
 352  */
 353 static void
 354 add_probe_orderings_for_stops(pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 355 {
 356     for (GList *iter = scheduler->priv->ordering_constraints;
 357          iter != NULL; iter = iter->next) {
 358 
 359         pcmk__action_relation_t *order = iter->data;
 360         uint32_t order_flags = pcmk__ar_ordered;
 361         GList *probes = NULL;
 362         GList *then_actions = NULL;
 363         pcmk_action_t *first = NULL;
 364         pcmk_action_t *then = NULL;
 365 
 366         // Skip disabled orderings
 367         if (order->flags == pcmk__ar_none) {
 368             continue;
 369         }
 370 
 371         // Skip non-resource orderings, and orderings for the same resource
 372         if ((order->rsc1 == NULL) || (order->rsc1 == order->rsc2)) {
 373             continue;
 374         }
 375 
 376         // Skip invalid orderings (shouldn't be possible)
 377         first = order->action1;
 378         then = order->action2;
 379         if (((first == NULL) && (order->task1 == NULL))
 380             || ((then == NULL) && (order->task2 == NULL))) {
 381             continue;
 382         }
 383 
 384         // Skip orderings for first actions other than stop
 385         if ((first != NULL) && !pcmk__str_eq(first->task, PCMK_ACTION_STOP,
 386                                              pcmk__str_none)) {
 387             continue;
 388         } else if ((first == NULL)
 389                    && !pcmk__ends_with(order->task1,
 390                                        "_" PCMK_ACTION_STOP "_0")) {
 391             continue;
 392         }
 393 
 394         /* Do not imply a probe ordering for a resource inside of a stopping
 395          * launcher. Otherwise, it might introduce a transition loop, since a
 396          * probe could be scheduled after the launcher starts again.
 397          */
 398         if ((order->rsc2 != NULL)
 399             && (order->rsc1->priv->launcher == order->rsc2)) {
 400 
 401             if ((then != NULL) && pcmk__str_eq(then->task, PCMK_ACTION_STOP,
 402                                                pcmk__str_none)) {
 403                 continue;
 404             } else if ((then == NULL)
 405                        && pcmk__ends_with(order->task2,
 406                                           "_" PCMK_ACTION_STOP "_0")) {
 407                 continue;
 408             }
 409         }
 410 
 411         // Preserve certain order options for future filtering
 412         if (pcmk_is_set(order->flags, pcmk__ar_if_first_unmigratable)) {
 413             pcmk__set_relation_flags(order_flags,
 414                                      pcmk__ar_if_first_unmigratable);
 415         }
 416         if (pcmk_is_set(order->flags, pcmk__ar_if_on_same_node)) {
 417             pcmk__set_relation_flags(order_flags, pcmk__ar_if_on_same_node);
 418         }
 419 
 420         // Preserve certain order types for future filtering
 421         if ((order->flags == pcmk__ar_if_required_on_same_node)
 422             || (order->flags == pcmk__ar_if_on_same_node_or_target)) {
 423             order_flags = order->flags;
 424         }
 425 
 426         // List all scheduled probes for the first resource
 427         probes = pe__resource_actions(order->rsc1, NULL, PCMK_ACTION_MONITOR,
 428                                       FALSE);
 429         if (probes == NULL) { // There aren't any
 430             continue;
 431         }
 432 
 433         // List all relevant "then" actions
 434         if (then != NULL) {
 435             then_actions = g_list_prepend(NULL, then);
 436 
 437         } else if (order->rsc2 != NULL) {
 438             then_actions = find_actions(order->rsc2->priv->actions,
 439                                         order->task2, NULL);
 440             if (then_actions == NULL) { // There aren't any
 441                 g_list_free(probes);
 442                 continue;
 443             }
 444         }
 445 
 446         crm_trace("Implying 'probe then' orderings for '%s then %s' "
 447                   "(id=%d, type=%.6x)",
 448                   ((first == NULL)? order->task1 : first->uuid),
 449                   ((then == NULL)? order->task2 : then->uuid),
 450                   order->id, order->flags);
 451 
 452         for (GList *probe_iter = probes; probe_iter != NULL;
 453              probe_iter = probe_iter->next) {
 454 
 455             pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
 456 
 457             for (GList *then_iter = then_actions; then_iter != NULL;
 458                  then_iter = then_iter->next) {
 459 
 460                 pcmk_action_t *then = (pcmk_action_t *) then_iter->data;
 461 
 462                 if (probe_needed_before_action(probe, then)) {
 463                     order_actions(probe, then, order_flags);
 464                 }
 465             }
 466         }
 467 
 468         g_list_free(then_actions);
 469         g_list_free(probes);
 470     }
 471 }
 472 
 473 /*!
 474  * \internal
 475  * \brief Add necessary orderings between probe and starts of clone instances
 476  *
 477  * , in additon to the ordering with the parent resource added upon creating
 478  * the probe.
 479  *
 480  * \param[in,out] probe     Probe as 'first' action in an ordering
 481  * \param[in,out] after     'then' action wrapper in the ordering
 482  */
 483 static void
 484 add_start_orderings_for_probe(pcmk_action_t *probe,
     /* [previous][next][first][last][top][bottom][index][help] */
 485                               pcmk__related_action_t *after)
 486 {
 487     uint32_t flags = pcmk__ar_ordered|pcmk__ar_unrunnable_first_blocks;
 488 
 489     /* Although the ordering between the probe of the clone instance and the
 490      * start of its parent has been added in pcmk__probe_rsc_on_node(), we
 491      * avoided enforcing `pcmk__ar_unrunnable_first_blocks` order type for that
 492      * as long as any of the clone instances are running to prevent them from
 493      * being unexpectedly stopped.
 494      *
 495      * On the other hand, we still need to prevent any inactive instances from
 496      * starting unless the probe is runnable so that we don't risk starting too
 497      * many instances before we know the state on all nodes.
 498      */
 499     if ((after->action->rsc->priv->variant <= pcmk__rsc_variant_group)
 500         || pcmk_is_set(probe->flags, pcmk__action_runnable)
 501         // The order type is already enforced for its parent.
 502         || pcmk_is_set(after->flags, pcmk__ar_unrunnable_first_blocks)
 503         || (pe__const_top_resource(probe->rsc, false) != after->action->rsc)
 504         || !pcmk__str_eq(after->action->task, PCMK_ACTION_START,
 505                          pcmk__str_none)) {
 506         return;
 507     }
 508 
 509     crm_trace("Adding probe start orderings for 'unrunnable %s@%s "
 510               "then instances of %s@%s'",
 511               probe->uuid, pcmk__node_name(probe->node),
 512               after->action->uuid, pcmk__node_name(after->action->node));
 513 
 514     for (GList *then_iter = after->action->actions_after; then_iter != NULL;
 515          then_iter = then_iter->next) {
 516 
 517         pcmk__related_action_t *then = then_iter->data;
 518 
 519         if ((then->action->rsc->priv->active_nodes != NULL)
 520             || (pe__const_top_resource(then->action->rsc, false)
 521                 != after->action->rsc)
 522             || !pcmk__str_eq(then->action->task, PCMK_ACTION_START,
 523                              pcmk__str_none)) {
 524             continue;
 525         }
 526 
 527         crm_trace("Adding probe start ordering for 'unrunnable %s@%s "
 528                   "then %s@%s' (type=%#.6x)",
 529                   probe->uuid, pcmk__node_name(probe->node),
 530                   then->action->uuid, pcmk__node_name(then->action->node),
 531                   flags);
 532 
 533         /* Prevent the instance from starting if the instance can't, but don't
 534          * cause any other intances to stop if already active.
 535          */
 536         order_actions(probe, then->action, flags);
 537     }
 538 
 539     return;
 540 }
 541 
 542 /*!
 543  * \internal
 544  * \brief Order probes before restarts and re-promotes
 545  *
 546  * If a given ordering is a "probe then start" or "probe then promote" ordering,
 547  * add an implicit "probe then stop/demote" ordering in case the action is part
 548  * of a restart/re-promote, and do the same recursively for all actions ordered
 549  * after the "then" action.
 550  *
 551  * \param[in,out] probe     Probe as 'first' action in an ordering
 552  * \param[in,out] after     'then' action in the ordering
 553  */
 554 static void
 555 add_restart_orderings_for_probe(pcmk_action_t *probe, pcmk_action_t *after)
     /* [previous][next][first][last][top][bottom][index][help] */
 556 {
 557     GList *iter = NULL;
 558     bool interleave = false;
 559     pcmk_resource_t *compatible_rsc = NULL;
 560 
 561     // Validate that this is a resource probe followed by some action
 562     if ((after == NULL) || (probe == NULL) || !pcmk__is_primitive(probe->rsc)
 563         || !pcmk__str_eq(probe->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
 564         return;
 565     }
 566 
 567     // Avoid running into any possible loop
 568     if (pcmk_is_set(after->flags, pcmk__action_detect_loop)) {
 569         return;
 570     }
 571     pcmk__set_action_flags(after, pcmk__action_detect_loop);
 572 
 573     crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
 574               probe->uuid, pcmk__node_name(probe->node),
 575               after->uuid, pcmk__node_name(after->node));
 576 
 577     /* Add restart orderings if "then" is for a different primitive.
 578      * Orderings for collective resources will be added later.
 579      */
 580     if (pcmk__is_primitive(after->rsc) && (probe->rsc != after->rsc)) {
 581 
 582             GList *then_actions = NULL;
 583 
 584             if (pcmk__str_eq(after->task, PCMK_ACTION_START, pcmk__str_none)) {
 585                 then_actions = pe__resource_actions(after->rsc, NULL,
 586                                                     PCMK_ACTION_STOP, FALSE);
 587 
 588             } else if (pcmk__str_eq(after->task, PCMK_ACTION_PROMOTE,
 589                                     pcmk__str_none)) {
 590                 then_actions = pe__resource_actions(after->rsc, NULL,
 591                                                     PCMK_ACTION_DEMOTE, FALSE);
 592             }
 593 
 594             for (iter = then_actions; iter != NULL; iter = iter->next) {
 595                 pcmk_action_t *then = (pcmk_action_t *) iter->data;
 596 
 597                 // Skip pseudo-actions (for example, those implied by fencing)
 598                 if (!pcmk_is_set(then->flags, pcmk__action_pseudo)) {
 599                     order_actions(probe, then, pcmk__ar_ordered);
 600                 }
 601             }
 602             g_list_free(then_actions);
 603     }
 604 
 605     /* Detect whether "then" is an interleaved clone action. For these, we want
 606      * to add orderings only for the relevant instance.
 607      */
 608     if ((after->rsc != NULL)
 609         && (after->rsc->priv->variant > pcmk__rsc_variant_group)) {
 610 
 611         interleave = crm_is_true(g_hash_table_lookup(after->rsc->priv->meta,
 612                                                      PCMK_META_INTERLEAVE));
 613         if (interleave) {
 614             compatible_rsc = pcmk__find_compatible_instance(probe->rsc,
 615                                                             after->rsc,
 616                                                             pcmk_role_unknown,
 617                                                             false);
 618         }
 619     }
 620 
 621     /* Now recursively do the same for all actions ordered after "then". This
 622      * also handles collective resources since the collective action will be
 623      * ordered before its individual instances' actions.
 624      */
 625     for (iter = after->actions_after; iter != NULL; iter = iter->next) {
 626         pcmk__related_action_t *after_wrapper = iter->data;
 627         const pcmk_resource_t *chained_rsc = NULL;
 628 
 629         /* pcmk__ar_first_implies_then is the reason why a required A.start
 630          * implies/enforces B.start to be required too, which is the cause of
 631          * B.restart/re-promote.
 632          *
 633          * Not sure about pcmk__ar_first_implies_same_node_then though. It's now
 634          * only used for unfencing case, which tends to introduce transition
 635          * loops...
 636          */
 637         if (!pcmk_is_set(after_wrapper->flags, pcmk__ar_first_implies_then)) {
 638             /* The order type between a group/clone and its child such as
 639              * B.start-> B_child.start is:
 640              * pcmk__ar_then_implies_first_graphed
 641              * |pcmk__ar_unrunnable_first_blocks
 642              *
 643              * Proceed through the ordering chain and build dependencies with
 644              * its children.
 645              */
 646             if ((after->rsc == NULL)
 647                 || (after->rsc->priv->variant < pcmk__rsc_variant_group)
 648                 || (probe->rsc->priv->parent == after->rsc)
 649                 || (after_wrapper->action->rsc == NULL)) {
 650                 continue;
 651             }
 652             chained_rsc = after_wrapper->action->rsc;
 653 
 654             if ((chained_rsc->priv->variant > pcmk__rsc_variant_group)
 655                 || (after->rsc != chained_rsc->priv->parent)) {
 656                 continue;
 657             }
 658 
 659             /* Proceed to the children of a group or a non-interleaved clone.
 660              * For an interleaved clone, proceed only to the relevant child.
 661              */
 662             if ((after->rsc->priv->variant > pcmk__rsc_variant_group)
 663                 && interleave
 664                 && ((compatible_rsc == NULL)
 665                     || (compatible_rsc != chained_rsc))) {
 666                 continue;
 667             }
 668         }
 669 
 670         crm_trace("Recursively adding probe restart orderings for "
 671                   "'%s@%s then %s@%s' (type=%#.6x)",
 672                   after->uuid, pcmk__node_name(after->node),
 673                   after_wrapper->action->uuid,
 674                   pcmk__node_name(after_wrapper->action->node),
 675                   after_wrapper->flags);
 676 
 677         add_restart_orderings_for_probe(probe, after_wrapper->action);
 678     }
 679 }
 680 
 681 /*!
 682  * \internal
 683  * \brief Clear the tracking flag on all scheduled actions
 684  *
 685  * \param[in,out] scheduler  Scheduler data
 686  */
 687 static void
 688 clear_actions_tracking_flag(pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 689 {
 690     for (GList *iter = scheduler->priv->actions;
 691          iter != NULL; iter = iter->next) {
 692         pcmk_action_t *action = iter->data;
 693 
 694         pcmk__clear_action_flags(action, pcmk__action_detect_loop);
 695     }
 696 }
 697 
 698 /*!
 699  * \internal
 700  * \brief Add start and restart orderings for probes scheduled for a resource
 701  *
 702  * \param[in,out] data       Resource whose probes should be ordered
 703  * \param[in]     user_data  Unused
 704  */
 705 static void
 706 add_start_restart_orderings_for_rsc(gpointer data, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 707 {
 708     pcmk_resource_t *rsc = data;
 709     GList *probes = NULL;
 710 
 711     // For collective resources, order each instance recursively
 712     if (!pcmk__is_primitive(rsc)) {
 713         g_list_foreach(rsc->priv->children,
 714                        add_start_restart_orderings_for_rsc, NULL);
 715         return;
 716     }
 717 
 718     // Find all probes for given resource
 719     probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
 720 
 721     // Add probe restart orderings for each probe found
 722     for (GList *iter = probes; iter != NULL; iter = iter->next) {
 723         pcmk_action_t *probe = (pcmk_action_t *) iter->data;
 724 
 725         for (GList *then_iter = probe->actions_after; then_iter != NULL;
 726              then_iter = then_iter->next) {
 727 
 728             pcmk__related_action_t *then = then_iter->data;
 729 
 730             add_start_orderings_for_probe(probe, then);
 731             add_restart_orderings_for_probe(probe, then->action);
 732             clear_actions_tracking_flag(rsc->priv->scheduler);
 733         }
 734     }
 735 
 736     g_list_free(probes);
 737 }
 738 
 739 /*!
 740  * \internal
 741  * \brief Add "A then probe B" orderings for "A then B" orderings
 742  *
 743  * \param[in,out] scheduler  Scheduler data
 744  *
 745  * \note This function is currently disabled (see next comment).
 746  */
 747 static void
 748 order_then_probes(pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 749 {
 750 #if 0
 751     /* Given an ordering "A then B", we would prefer to wait for A to be started
 752      * before probing B.
 753      *
 754      * For example, if A is a filesystem which B can't even run without, it
 755      * would be helpful if the author of B's agent could assume that A is
 756      * running before B.monitor will be called.
 757      *
 758      * However, we can't _only_ probe after A is running, otherwise we wouldn't
 759      * detect the state of B if A could not be started. We can't even do an
 760      * opportunistic version of this, because B may be moving:
 761      *
 762      *   A.stop -> A.start -> B.probe -> B.stop -> B.start
 763      *
 764      * and if we add B.stop -> A.stop here, we get a loop:
 765      *
 766      *   A.stop -> A.start -> B.probe -> B.stop -> A.stop
 767      *
 768      * We could kill the "B.probe -> B.stop" dependency, but that could mean
 769      * stopping B "too" soon, because B.start must wait for the probe, and
 770      * we don't want to stop B if we can't start it.
 771      *
 772      * We could add the ordering only if A is an anonymous clone with
 773      * clone-max == node-max (since we'll never be moving it). However, we could
 774      * still be stopping one instance at the same time as starting another.
 775      *
 776      * The complexity of checking for allowed conditions combined with the ever
 777      * narrowing use case suggests that this code should remain disabled until
 778      * someone gets smarter.
 779      */
 780     for (GList *iter = scheduler->priv->resources;
 781          iter != NULL; iter = iter->next) {
 782         pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
 783 
 784         pcmk_action_t *start = NULL;
 785         GList *actions = NULL;
 786         GList *probes = NULL;
 787 
 788         actions = pe__resource_actions(rsc, NULL, PCMK_ACTION_START, FALSE);
 789 
 790         if (actions) {
 791             start = actions->data;
 792             g_list_free(actions);
 793         }
 794 
 795         if (start == NULL) {
 796             crm_debug("No start action for %s", rsc->id);
 797             continue;
 798         }
 799 
 800         probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
 801 
 802         for (actions = start->actions_before; actions != NULL;
 803              actions = actions->next) {
 804 
 805             pcmk__related_action_t *before = actions->data;
 806 
 807             pcmk_action_t *first = before->action;
 808             pcmk_resource_t *first_rsc = first->rsc;
 809 
 810             if (first->required_runnable_before) {
 811                 for (GList *clone_actions = first->actions_before;
 812                      clone_actions != NULL;
 813                      clone_actions = clone_actions->next) {
 814 
 815                     before = clone_actions->data;
 816 
 817                     crm_trace("Testing '%s then %s' for %s",
 818                               first->uuid, before->action->uuid, start->uuid);
 819 
 820                     pcmk__assert(before->action->rsc != NULL);
 821                     first_rsc = before->action->rsc;
 822                     break;
 823                 }
 824 
 825             } else if (!pcmk__str_eq(first->task, PCMK_ACTION_START,
 826                                      pcmk__str_none)) {
 827                 crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
 828             }
 829 
 830             if (first_rsc == NULL) {
 831                 continue;
 832 
 833             } else if (pe__const_top_resource(first_rsc, false)
 834                        == pe__const_top_resource(start->rsc, false)) {
 835                 crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
 836                 continue;
 837 
 838             } else if (!pcmk__is_clone(pe__const_top_resource(first_rsc,
 839                                                               false))) {
 840                 crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
 841                 continue;
 842             }
 843 
 844             crm_debug("Applying %s before %s", first->uuid, start->uuid);
 845 
 846             for (GList *probe_iter = probes; probe_iter != NULL;
 847                  probe_iter = probe_iter->next) {
 848 
 849                 pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
 850 
 851                 crm_debug("Ordering %s before %s", first->uuid, probe->uuid);
 852                 order_actions(first, probe, pcmk__ar_ordered);
 853             }
 854         }
 855     }
 856 #endif
 857 }
 858 
 859 void
 860 pcmk__order_probes(pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 861 {
 862     // Add orderings for "probe then X"
 863     g_list_foreach(scheduler->priv->resources,
 864                    add_start_restart_orderings_for_rsc, NULL);
 865     add_probe_orderings_for_stops(scheduler);
 866 
 867     order_then_probes(scheduler);
 868 }
 869 
 870 /*!
 871  * \internal
 872  * \brief Schedule any probes needed
 873  *
 874  * \param[in,out] scheduler  Scheduler data
 875  *
 876  * \note This may also schedule fencing of failed remote nodes.
 877  */
 878 void
 879 pcmk__schedule_probes(pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 880 {
 881     // Schedule probes on each node in the cluster as needed
 882     for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
 883         pcmk_node_t *node = (pcmk_node_t *) iter->data;
 884 
 885         if (!node->details->online) {   // Don't probe offline nodes
 886             if (pcmk__is_failed_remote_node(node)) {
 887                 pe_fence_node(scheduler, node,
 888                               "the connection is unrecoverable", FALSE);
 889             }
 890             continue;
 891         }
 892 
 893         if (node->details->unclean) {   // Don't probe nodes that need fencing
 894             continue;
 895         }
 896 
 897         if (!pcmk_is_set(node->priv->flags, pcmk__node_probes_allowed)) {
 898             // The user requested that probes not be done on this node
 899             continue;
 900         }
 901 
 902         // Probe each resource in the cluster on this node, as needed
 903         pcmk__probe_resource_list(scheduler->priv->resources, node);
 904     }
 905 }

/* [previous][next][first][last][top][bottom][index][help] */