root/lib/pacemaker/pcmk_sched_probes.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_expected_result
  2. pcmk__probe_resource_list
  3. probe_then_start
  4. guest_resource_will_stop
  5. probe_action
  6. pcmk__probe_rsc_on_node
  7. probe_needed_before_action
  8. add_probe_orderings_for_stops
  9. add_start_orderings_for_probe
  10. add_restart_orderings_for_probe
  11. clear_actions_tracking_flag
  12. add_start_restart_orderings_for_rsc
  13. order_then_probes
  14. pcmk__order_probes
  15. pcmk__schedule_probes

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <glib.h>
  13 
  14 #include <crm/crm.h>
  15 #include <crm/pengine/status.h>
  16 #include <pacemaker-internal.h>
  17 #include "libpacemaker_private.h"
  18 
  19 /*!
  20  * \internal
  21  * \brief Add the expected result to a newly created probe
  22  *
  23  * \param[in,out] probe  Probe action to add expected result to
  24  * \param[in]     rsc    Resource that probe is for
  25  * \param[in]     node   Node that probe will run on
  26  */
  27 static void
  28 add_expected_result(pcmk_action_t *probe, const pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
  29                     const pcmk_node_t *node)
  30 {
  31     // Check whether resource is currently active on node
  32     pcmk_node_t *running = pe_find_node_id(rsc->running_on, node->details->id);
  33 
  34     // The expected result is what we think the resource's current state is
  35     if (running == NULL) {
  36         pe__add_action_expected_result(probe, CRM_EX_NOT_RUNNING);
  37 
  38     } else if (rsc->role == pcmk_role_promoted) {
  39         pe__add_action_expected_result(probe, CRM_EX_PROMOTED);
  40     }
  41 }
  42 
  43 /*!
  44  * \internal
  45  * \brief Create any needed robes on a node for a list of resources
  46  *
  47  * \param[in,out] rscs  List of resources to create probes for
  48  * \param[in,out] node  Node to create probes on
  49  *
  50  * \return true if any probe was created, otherwise false
  51  */
  52 bool
  53 pcmk__probe_resource_list(GList *rscs, pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
  54 {
  55     bool any_created = false;
  56 
  57     for (GList *iter = rscs; iter != NULL; iter = iter->next) {
  58         pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
  59 
  60         if (rsc->cmds->create_probe(rsc, node)) {
  61             any_created = true;
  62         }
  63     }
  64     return any_created;
  65 }
  66 
  67 /*!
  68  * \internal
  69  * \brief Order one resource's start after another's start-up probe
  70  *
  71  * \param[in,out] rsc1  Resource that might get start-up probe
  72  * \param[in]     rsc2  Resource that might be started
  73  */
  74 static void
  75 probe_then_start(pcmk_resource_t *rsc1, pcmk_resource_t *rsc2)
     /* [previous][next][first][last][top][bottom][index][help] */
  76 {
  77     if ((rsc1->allocated_to != NULL)
  78         && (g_hash_table_lookup(rsc1->known_on,
  79                                 rsc1->allocated_to->details->id) == NULL)) {
  80 
  81         pcmk__new_ordering(rsc1,
  82                            pcmk__op_key(rsc1->id, PCMK_ACTION_MONITOR, 0),
  83                            NULL,
  84                            rsc2, pcmk__op_key(rsc2->id, PCMK_ACTION_START, 0),
  85                            NULL,
  86                            pcmk__ar_ordered, rsc1->cluster);
  87     }
  88 }
  89 
  90 /*!
  91  * \internal
  92  * \brief Check whether a guest resource will stop
  93  *
  94  * \param[in] node  Guest node to check
  95  *
  96  * \return true if guest resource will likely stop, otherwise false
  97  */
  98 static bool
  99 guest_resource_will_stop(const pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 100 {
 101     const pcmk_resource_t *guest_rsc = node->details->remote_rsc->container;
 102 
 103     /* Ideally, we'd check whether the guest has a required stop, but that
 104      * information doesn't exist yet, so approximate it ...
 105      */
 106     return node->details->remote_requires_reset
 107            || node->details->unclean
 108            || pcmk_is_set(guest_rsc->flags, pcmk_rsc_failed)
 109            || (guest_rsc->next_role == pcmk_role_stopped)
 110 
 111            // Guest is moving
 112            || ((guest_rsc->role > pcmk_role_stopped)
 113                && (guest_rsc->allocated_to != NULL)
 114                && (pcmk__find_node_in_list(guest_rsc->running_on,
 115                    guest_rsc->allocated_to->details->uname) == NULL));
 116 }
 117 
 118 /*!
 119  * \internal
 120  * \brief Create a probe action for a resource on a node
 121  *
 122  * \param[in,out] rsc   Resource to create probe for
 123  * \param[in,out] node  Node to create probe on
 124  *
 125  * \return Newly created probe action
 126  */
 127 static pcmk_action_t *
 128 probe_action(pcmk_resource_t *rsc, pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 129 {
 130     pcmk_action_t *probe = NULL;
 131     char *key = pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0);
 132 
 133     crm_debug("Scheduling probe of %s %s on %s",
 134               pcmk_role_text(rsc->role), rsc->id, pcmk__node_name(node));
 135 
 136     probe = custom_action(rsc, key, PCMK_ACTION_MONITOR, node, FALSE,
 137                           rsc->cluster);
 138     pcmk__clear_action_flags(probe, pcmk_action_optional);
 139 
 140     pcmk__order_vs_unfence(rsc, node, probe, pcmk__ar_ordered);
 141     add_expected_result(probe, rsc, node);
 142     return probe;
 143 }
 144 
 145 /*!
 146  * \internal
 147  * \brief Create probes for a resource on a node, if needed
 148  *
 149  * \brief Schedule any probes needed for a resource on a node
 150  *
 151  * \param[in,out] rsc   Resource to create probe for
 152  * \param[in,out] node  Node to create probe on
 153  *
 154  * \return true if any probe was created, otherwise false
 155  */
 156 bool
 157 pcmk__probe_rsc_on_node(pcmk_resource_t *rsc, pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 158 {
 159     uint32_t flags = pcmk__ar_ordered;
 160     pcmk_action_t *probe = NULL;
 161     pcmk_node_t *allowed = NULL;
 162     pcmk_resource_t *top = uber_parent(rsc);
 163     const char *reason = NULL;
 164 
 165     CRM_ASSERT((rsc != NULL) && (node != NULL));
 166 
 167     if (!pcmk_is_set(rsc->cluster->flags, pcmk_sched_probe_resources)) {
 168         reason = "start-up probes are disabled";
 169         goto no_probe;
 170     }
 171 
 172     if (pcmk__is_pacemaker_remote_node(node)) {
 173         const char *class = crm_element_value(rsc->xml, PCMK_XA_CLASS);
 174 
 175         if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) {
 176             reason = "Pacemaker Remote nodes cannot run stonith agents";
 177             goto no_probe;
 178 
 179         } else if (pcmk__is_guest_or_bundle_node(node)
 180                    && pe__resource_contains_guest_node(rsc->cluster, rsc)) {
 181             reason = "guest nodes cannot run resources containing guest nodes";
 182             goto no_probe;
 183 
 184         } else if (rsc->is_remote_node) {
 185             reason = "Pacemaker Remote nodes cannot host remote connections";
 186             goto no_probe;
 187         }
 188     }
 189 
 190     // If this is a collective resource, probes are created for its children
 191     if (rsc->children != NULL) {
 192         return pcmk__probe_resource_list(rsc->children, node);
 193     }
 194 
 195     if ((rsc->container != NULL) && !rsc->is_remote_node) {
 196         reason = "resource is inside a container";
 197         goto no_probe;
 198 
 199     } else if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
 200         reason = "resource is orphaned";
 201         goto no_probe;
 202 
 203     } else if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
 204         reason = "resource state is already known";
 205         goto no_probe;
 206     }
 207 
 208     allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
 209 
 210     if (rsc->exclusive_discover || top->exclusive_discover) {
 211         // Exclusive discovery is enabled ...
 212 
 213         if (allowed == NULL) {
 214             // ... but this node is not allowed to run the resource
 215             reason = "resource has exclusive discovery but is not allowed "
 216                      "on node";
 217             goto no_probe;
 218 
 219         } else if (allowed->rsc_discover_mode != pcmk_probe_exclusive) {
 220             // ... but no constraint marks this node for discovery of resource
 221             reason = "resource has exclusive discovery but is not enabled "
 222                      "on node";
 223             goto no_probe;
 224         }
 225     }
 226 
 227     if (allowed == NULL) {
 228         allowed = node;
 229     }
 230     if (allowed->rsc_discover_mode == pcmk_probe_never) {
 231         reason = "node has discovery disabled";
 232         goto no_probe;
 233     }
 234 
 235     if (pcmk__is_guest_or_bundle_node(node)) {
 236         pcmk_resource_t *guest = node->details->remote_rsc->container;
 237 
 238         if (guest->role == pcmk_role_stopped) {
 239             // The guest is stopped, so we know no resource is active there
 240             reason = "node's guest is stopped";
 241             probe_then_start(guest, top);
 242             goto no_probe;
 243 
 244         } else if (guest_resource_will_stop(node)) {
 245             reason = "node's guest will stop";
 246 
 247             // Order resource start after guest stop (in case it's restarting)
 248             pcmk__new_ordering(guest,
 249                                pcmk__op_key(guest->id, PCMK_ACTION_STOP, 0),
 250                                NULL, top,
 251                                pcmk__op_key(top->id, PCMK_ACTION_START, 0),
 252                                NULL, pcmk__ar_ordered, rsc->cluster);
 253             goto no_probe;
 254         }
 255     }
 256 
 257     // We've eliminated all cases where a probe is not needed, so now it is
 258     probe = probe_action(rsc, node);
 259 
 260     /* Below, we will order the probe relative to start or reload. If this is a
 261      * clone instance, the start or reload is for the entire clone rather than
 262      * just the instance. Otherwise, the start or reload is for the resource
 263      * itself.
 264      */
 265     if (!pcmk__is_clone(top)) {
 266         top = rsc;
 267     }
 268 
 269     /* Prevent a start if the resource can't be probed, but don't cause the
 270      * resource or entire clone to stop if already active.
 271      */
 272     if (!pcmk_is_set(probe->flags, pcmk_action_runnable)
 273         && (top->running_on == NULL)) {
 274         pcmk__set_relation_flags(flags, pcmk__ar_unrunnable_first_blocks);
 275     }
 276 
 277     // Start or reload after probing the resource
 278     pcmk__new_ordering(rsc, NULL, probe,
 279                        top, pcmk__op_key(top->id, PCMK_ACTION_START, 0), NULL,
 280                        flags, rsc->cluster);
 281     pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
 282                        pcmk__ar_ordered, rsc->cluster);
 283 
 284     return true;
 285 
 286 no_probe:
 287     pcmk__rsc_trace(rsc,
 288                     "Skipping probe for %s on %s because %s",
 289                     rsc->id, node->details->id, reason);
 290     return false;
 291 }
 292 
 293 /*!
 294  * \internal
 295  * \brief Check whether a probe should be ordered before another action
 296  *
 297  * \param[in] probe  Probe action to check
 298  * \param[in] then   Other action to check
 299  *
 300  * \return true if \p probe should be ordered before \p then, otherwise false
 301  */
 302 static bool
 303 probe_needed_before_action(const pcmk_action_t *probe,
     /* [previous][next][first][last][top][bottom][index][help] */
 304                            const pcmk_action_t *then)
 305 {
 306     // Probes on a node are performed after unfencing it, not before
 307     if (pcmk__str_eq(then->task, PCMK_ACTION_STONITH, pcmk__str_none)
 308         && pcmk__same_node(probe->node, then->node)) {
 309         const char *op = g_hash_table_lookup(then->meta,
 310                                              PCMK__META_STONITH_ACTION);
 311 
 312         if (pcmk__str_eq(op, PCMK_ACTION_ON, pcmk__str_casei)) {
 313             return false;
 314         }
 315     }
 316 
 317     // Probes should be done on a node before shutting it down
 318     if (pcmk__str_eq(then->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)
 319         && (probe->node != NULL) && (then->node != NULL)
 320         && !pcmk__same_node(probe->node, then->node)) {
 321         return false;
 322     }
 323 
 324     // Otherwise probes should always be done before any other action
 325     return true;
 326 }
 327 
 328 /*!
 329  * \internal
 330  * \brief Add implicit "probe then X" orderings for "stop then X" orderings
 331  *
 332  * If the state of a resource is not known yet, a probe will be scheduled,
 333  * expecting a "not running" result. If the probe fails, a stop will not be
 334  * scheduled until the next transition. Thus, if there are ordering constraints
 335  * like "stop this resource then do something else that's not for the same
 336  * resource", add implicit "probe this resource then do something" equivalents
 337  * so the relation is upheld until we know whether a stop is needed.
 338  *
 339  * \param[in,out] scheduler  Scheduler data
 340  */
 341 static void
 342 add_probe_orderings_for_stops(pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 343 {
 344     for (GList *iter = scheduler->ordering_constraints; iter != NULL;
 345          iter = iter->next) {
 346 
 347         pcmk__action_relation_t *order = iter->data;
 348         uint32_t order_flags = pcmk__ar_ordered;
 349         GList *probes = NULL;
 350         GList *then_actions = NULL;
 351         pcmk_action_t *first = NULL;
 352         pcmk_action_t *then = NULL;
 353 
 354         // Skip disabled orderings
 355         if (order->flags == pcmk__ar_none) {
 356             continue;
 357         }
 358 
 359         // Skip non-resource orderings, and orderings for the same resource
 360         if ((order->rsc1 == NULL) || (order->rsc1 == order->rsc2)) {
 361             continue;
 362         }
 363 
 364         // Skip invalid orderings (shouldn't be possible)
 365         first = order->action1;
 366         then = order->action2;
 367         if (((first == NULL) && (order->task1 == NULL))
 368             || ((then == NULL) && (order->task2 == NULL))) {
 369             continue;
 370         }
 371 
 372         // Skip orderings for first actions other than stop
 373         if ((first != NULL) && !pcmk__str_eq(first->task, PCMK_ACTION_STOP,
 374                                              pcmk__str_none)) {
 375             continue;
 376         } else if ((first == NULL)
 377                    && !pcmk__ends_with(order->task1,
 378                                        "_" PCMK_ACTION_STOP "_0")) {
 379             continue;
 380         }
 381 
 382         /* Do not imply a probe ordering for a resource inside of a stopping
 383          * container. Otherwise, it might introduce a transition loop, since a
 384          * probe could be scheduled after the container starts again.
 385          */
 386         if ((order->rsc2 != NULL) && (order->rsc1->container == order->rsc2)) {
 387 
 388             if ((then != NULL) && pcmk__str_eq(then->task, PCMK_ACTION_STOP,
 389                                                pcmk__str_none)) {
 390                 continue;
 391             } else if ((then == NULL)
 392                        && pcmk__ends_with(order->task2,
 393                                           "_" PCMK_ACTION_STOP "_0")) {
 394                 continue;
 395             }
 396         }
 397 
 398         // Preserve certain order options for future filtering
 399         if (pcmk_is_set(order->flags, pcmk__ar_if_first_unmigratable)) {
 400             pcmk__set_relation_flags(order_flags,
 401                                      pcmk__ar_if_first_unmigratable);
 402         }
 403         if (pcmk_is_set(order->flags, pcmk__ar_if_on_same_node)) {
 404             pcmk__set_relation_flags(order_flags, pcmk__ar_if_on_same_node);
 405         }
 406 
 407         // Preserve certain order types for future filtering
 408         if ((order->flags == pcmk__ar_if_required_on_same_node)
 409             || (order->flags == pcmk__ar_if_on_same_node_or_target)) {
 410             order_flags = order->flags;
 411         }
 412 
 413         // List all scheduled probes for the first resource
 414         probes = pe__resource_actions(order->rsc1, NULL, PCMK_ACTION_MONITOR,
 415                                       FALSE);
 416         if (probes == NULL) { // There aren't any
 417             continue;
 418         }
 419 
 420         // List all relevant "then" actions
 421         if (then != NULL) {
 422             then_actions = g_list_prepend(NULL, then);
 423 
 424         } else if (order->rsc2 != NULL) {
 425             then_actions = find_actions(order->rsc2->actions, order->task2,
 426                                         NULL);
 427             if (then_actions == NULL) { // There aren't any
 428                 g_list_free(probes);
 429                 continue;
 430             }
 431         }
 432 
 433         crm_trace("Implying 'probe then' orderings for '%s then %s' "
 434                   "(id=%d, type=%.6x)",
 435                   ((first == NULL)? order->task1 : first->uuid),
 436                   ((then == NULL)? order->task2 : then->uuid),
 437                   order->id, order->flags);
 438 
 439         for (GList *probe_iter = probes; probe_iter != NULL;
 440              probe_iter = probe_iter->next) {
 441 
 442             pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
 443 
 444             for (GList *then_iter = then_actions; then_iter != NULL;
 445                  then_iter = then_iter->next) {
 446 
 447                 pcmk_action_t *then = (pcmk_action_t *) then_iter->data;
 448 
 449                 if (probe_needed_before_action(probe, then)) {
 450                     order_actions(probe, then, order_flags);
 451                 }
 452             }
 453         }
 454 
 455         g_list_free(then_actions);
 456         g_list_free(probes);
 457     }
 458 }
 459 
 460 /*!
 461  * \internal
 462  * \brief Add necessary orderings between probe and starts of clone instances
 463  *
 464  * , in additon to the ordering with the parent resource added upon creating
 465  * the probe.
 466  *
 467  * \param[in,out] probe     Probe as 'first' action in an ordering
 468  * \param[in,out] after     'then' action wrapper in the ordering
 469  */
 470 static void
 471 add_start_orderings_for_probe(pcmk_action_t *probe,
     /* [previous][next][first][last][top][bottom][index][help] */
 472                               pcmk__related_action_t *after)
 473 {
 474     uint32_t flags = pcmk__ar_ordered|pcmk__ar_unrunnable_first_blocks;
 475 
 476     /* Although the ordering between the probe of the clone instance and the
 477      * start of its parent has been added in pcmk__probe_rsc_on_node(), we
 478      * avoided enforcing `pcmk__ar_unrunnable_first_blocks` order type for that
 479      * as long as any of the clone instances are running to prevent them from
 480      * being unexpectedly stopped.
 481      *
 482      * On the other hand, we still need to prevent any inactive instances from
 483      * starting unless the probe is runnable so that we don't risk starting too
 484      * many instances before we know the state on all nodes.
 485      */
 486     if ((after->action->rsc->variant <= pcmk_rsc_variant_group)
 487         || pcmk_is_set(probe->flags, pcmk_action_runnable)
 488         // The order type is already enforced for its parent.
 489         || pcmk_is_set(after->type, pcmk__ar_unrunnable_first_blocks)
 490         || (pe__const_top_resource(probe->rsc, false) != after->action->rsc)
 491         || !pcmk__str_eq(after->action->task, PCMK_ACTION_START,
 492                          pcmk__str_none)) {
 493         return;
 494     }
 495 
 496     crm_trace("Adding probe start orderings for 'unrunnable %s@%s "
 497               "then instances of %s@%s'",
 498               probe->uuid, pcmk__node_name(probe->node),
 499               after->action->uuid, pcmk__node_name(after->action->node));
 500 
 501     for (GList *then_iter = after->action->actions_after; then_iter != NULL;
 502          then_iter = then_iter->next) {
 503 
 504         pcmk__related_action_t *then = then_iter->data;
 505 
 506         if (then->action->rsc->running_on
 507             || (pe__const_top_resource(then->action->rsc, false)
 508                 != after->action->rsc)
 509             || !pcmk__str_eq(then->action->task, PCMK_ACTION_START,
 510                              pcmk__str_none)) {
 511             continue;
 512         }
 513 
 514         crm_trace("Adding probe start ordering for 'unrunnable %s@%s "
 515                   "then %s@%s' (type=%#.6x)",
 516                   probe->uuid, pcmk__node_name(probe->node),
 517                   then->action->uuid, pcmk__node_name(then->action->node),
 518                   flags);
 519 
 520         /* Prevent the instance from starting if the instance can't, but don't
 521          * cause any other intances to stop if already active.
 522          */
 523         order_actions(probe, then->action, flags);
 524     }
 525 
 526     return;
 527 }
 528 
 529 /*!
 530  * \internal
 531  * \brief Order probes before restarts and re-promotes
 532  *
 533  * If a given ordering is a "probe then start" or "probe then promote" ordering,
 534  * add an implicit "probe then stop/demote" ordering in case the action is part
 535  * of a restart/re-promote, and do the same recursively for all actions ordered
 536  * after the "then" action.
 537  *
 538  * \param[in,out] probe     Probe as 'first' action in an ordering
 539  * \param[in,out] after     'then' action in the ordering
 540  */
 541 static void
 542 add_restart_orderings_for_probe(pcmk_action_t *probe, pcmk_action_t *after)
     /* [previous][next][first][last][top][bottom][index][help] */
 543 {
 544     GList *iter = NULL;
 545     bool interleave = false;
 546     pcmk_resource_t *compatible_rsc = NULL;
 547 
 548     // Validate that this is a resource probe followed by some action
 549     if ((after == NULL) || (probe == NULL) || !pcmk__is_primitive(probe->rsc)
 550         || !pcmk__str_eq(probe->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
 551         return;
 552     }
 553 
 554     // Avoid running into any possible loop
 555     if (pcmk_is_set(after->flags, pcmk_action_detect_loop)) {
 556         return;
 557     }
 558     pcmk__set_action_flags(after, pcmk_action_detect_loop);
 559 
 560     crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
 561               probe->uuid, pcmk__node_name(probe->node),
 562               after->uuid, pcmk__node_name(after->node));
 563 
 564     /* Add restart orderings if "then" is for a different primitive.
 565      * Orderings for collective resources will be added later.
 566      */
 567     if (pcmk__is_primitive(after->rsc) && (probe->rsc != after->rsc)) {
 568 
 569             GList *then_actions = NULL;
 570 
 571             if (pcmk__str_eq(after->task, PCMK_ACTION_START, pcmk__str_none)) {
 572                 then_actions = pe__resource_actions(after->rsc, NULL,
 573                                                     PCMK_ACTION_STOP, FALSE);
 574 
 575             } else if (pcmk__str_eq(after->task, PCMK_ACTION_PROMOTE,
 576                                     pcmk__str_none)) {
 577                 then_actions = pe__resource_actions(after->rsc, NULL,
 578                                                     PCMK_ACTION_DEMOTE, FALSE);
 579             }
 580 
 581             for (iter = then_actions; iter != NULL; iter = iter->next) {
 582                 pcmk_action_t *then = (pcmk_action_t *) iter->data;
 583 
 584                 // Skip pseudo-actions (for example, those implied by fencing)
 585                 if (!pcmk_is_set(then->flags, pcmk_action_pseudo)) {
 586                     order_actions(probe, then, pcmk__ar_ordered);
 587                 }
 588             }
 589             g_list_free(then_actions);
 590     }
 591 
 592     /* Detect whether "then" is an interleaved clone action. For these, we want
 593      * to add orderings only for the relevant instance.
 594      */
 595     if ((after->rsc != NULL)
 596         && (after->rsc->variant > pcmk_rsc_variant_group)) {
 597         const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
 598                                                        PCMK_META_INTERLEAVE);
 599 
 600         interleave = crm_is_true(interleave_s);
 601         if (interleave) {
 602             compatible_rsc = pcmk__find_compatible_instance(probe->rsc,
 603                                                             after->rsc,
 604                                                             pcmk_role_unknown,
 605                                                             false);
 606         }
 607     }
 608 
 609     /* Now recursively do the same for all actions ordered after "then". This
 610      * also handles collective resources since the collective action will be
 611      * ordered before its individual instances' actions.
 612      */
 613     for (iter = after->actions_after; iter != NULL; iter = iter->next) {
 614         pcmk__related_action_t *after_wrapper = iter->data;
 615 
 616         /* pcmk__ar_first_implies_then is the reason why a required A.start
 617          * implies/enforces B.start to be required too, which is the cause of
 618          * B.restart/re-promote.
 619          *
 620          * Not sure about pcmk__ar_first_implies_same_node_then though. It's now
 621          * only used for unfencing case, which tends to introduce transition
 622          * loops...
 623          */
 624         if (!pcmk_is_set(after_wrapper->type, pcmk__ar_first_implies_then)) {
 625             /* The order type between a group/clone and its child such as
 626              * B.start-> B_child.start is:
 627              * pcmk__ar_then_implies_first_graphed
 628              * |pcmk__ar_unrunnable_first_blocks
 629              *
 630              * Proceed through the ordering chain and build dependencies with
 631              * its children.
 632              */
 633             if ((after->rsc == NULL)
 634                 || (after->rsc->variant < pcmk_rsc_variant_group)
 635                 || (probe->rsc->parent == after->rsc)
 636                 || (after_wrapper->action->rsc == NULL)
 637                 || (after_wrapper->action->rsc->variant > pcmk_rsc_variant_group)
 638                 || (after->rsc != after_wrapper->action->rsc->parent)) {
 639                 continue;
 640             }
 641 
 642             /* Proceed to the children of a group or a non-interleaved clone.
 643              * For an interleaved clone, proceed only to the relevant child.
 644              */
 645             if ((after->rsc->variant > pcmk_rsc_variant_group) && interleave
 646                 && ((compatible_rsc == NULL)
 647                     || (compatible_rsc != after_wrapper->action->rsc))) {
 648                 continue;
 649             }
 650         }
 651 
 652         crm_trace("Recursively adding probe restart orderings for "
 653                   "'%s@%s then %s@%s' (type=%#.6x)",
 654                   after->uuid, pcmk__node_name(after->node),
 655                   after_wrapper->action->uuid,
 656                   pcmk__node_name(after_wrapper->action->node),
 657                   after_wrapper->type);
 658 
 659         add_restart_orderings_for_probe(probe, after_wrapper->action);
 660     }
 661 }
 662 
 663 /*!
 664  * \internal
 665  * \brief Clear the tracking flag on all scheduled actions
 666  *
 667  * \param[in,out] scheduler  Scheduler data
 668  */
 669 static void
 670 clear_actions_tracking_flag(pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 671 {
 672     for (GList *iter = scheduler->actions; iter != NULL; iter = iter->next) {
 673         pcmk_action_t *action = iter->data;
 674 
 675         pcmk__clear_action_flags(action, pcmk_action_detect_loop);
 676     }
 677 }
 678 
 679 /*!
 680  * \internal
 681  * \brief Add start and restart orderings for probes scheduled for a resource
 682  *
 683  * \param[in,out] data       Resource whose probes should be ordered
 684  * \param[in]     user_data  Unused
 685  */
 686 static void
 687 add_start_restart_orderings_for_rsc(gpointer data, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 688 {
 689     pcmk_resource_t *rsc = data;
 690     GList *probes = NULL;
 691 
 692     // For collective resources, order each instance recursively
 693     if (!pcmk__is_primitive(rsc)) {
 694         g_list_foreach(rsc->children, add_start_restart_orderings_for_rsc,
 695                        NULL);
 696         return;
 697     }
 698 
 699     // Find all probes for given resource
 700     probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
 701 
 702     // Add probe restart orderings for each probe found
 703     for (GList *iter = probes; iter != NULL; iter = iter->next) {
 704         pcmk_action_t *probe = (pcmk_action_t *) iter->data;
 705 
 706         for (GList *then_iter = probe->actions_after; then_iter != NULL;
 707              then_iter = then_iter->next) {
 708 
 709             pcmk__related_action_t *then = then_iter->data;
 710 
 711             add_start_orderings_for_probe(probe, then);
 712             add_restart_orderings_for_probe(probe, then->action);
 713             clear_actions_tracking_flag(rsc->cluster);
 714         }
 715     }
 716 
 717     g_list_free(probes);
 718 }
 719 
 720 /*!
 721  * \internal
 722  * \brief Add "A then probe B" orderings for "A then B" orderings
 723  *
 724  * \param[in,out] scheduler  Scheduler data
 725  *
 726  * \note This function is currently disabled (see next comment).
 727  */
 728 static void
 729 order_then_probes(pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 730 {
 731 #if 0
 732     /* Given an ordering "A then B", we would prefer to wait for A to be started
 733      * before probing B.
 734      *
 735      * For example, if A is a filesystem which B can't even run without, it
 736      * would be helpful if the author of B's agent could assume that A is
 737      * running before B.monitor will be called.
 738      *
 739      * However, we can't _only_ probe after A is running, otherwise we wouldn't
 740      * detect the state of B if A could not be started. We can't even do an
 741      * opportunistic version of this, because B may be moving:
 742      *
 743      *   A.stop -> A.start -> B.probe -> B.stop -> B.start
 744      *
 745      * and if we add B.stop -> A.stop here, we get a loop:
 746      *
 747      *   A.stop -> A.start -> B.probe -> B.stop -> A.stop
 748      *
 749      * We could kill the "B.probe -> B.stop" dependency, but that could mean
 750      * stopping B "too" soon, because B.start must wait for the probe, and
 751      * we don't want to stop B if we can't start it.
 752      *
 753      * We could add the ordering only if A is an anonymous clone with
 754      * clone-max == node-max (since we'll never be moving it). However, we could
 755      * still be stopping one instance at the same time as starting another.
 756      *
 757      * The complexity of checking for allowed conditions combined with the ever
 758      * narrowing use case suggests that this code should remain disabled until
 759      * someone gets smarter.
 760      */
 761     for (GList *iter = scheduler->resources; iter != NULL; iter = iter->next) {
 762         pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
 763 
 764         pcmk_action_t *start = NULL;
 765         GList *actions = NULL;
 766         GList *probes = NULL;
 767 
 768         actions = pe__resource_actions(rsc, NULL, PCMK_ACTION_START, FALSE);
 769 
 770         if (actions) {
 771             start = actions->data;
 772             g_list_free(actions);
 773         }
 774 
 775         if (start == NULL) {
 776             crm_debug("No start action for %s", rsc->id);
 777             continue;
 778         }
 779 
 780         probes = pe__resource_actions(rsc, NULL, PCMK_ACTION_MONITOR, FALSE);
 781 
 782         for (actions = start->actions_before; actions != NULL;
 783              actions = actions->next) {
 784 
 785             pcmk__related_action_t *before = actions->data;
 786 
 787             pcmk_action_t *first = before->action;
 788             pcmk_resource_t *first_rsc = first->rsc;
 789 
 790             if (first->required_runnable_before) {
 791                 for (GList *clone_actions = first->actions_before;
 792                      clone_actions != NULL;
 793                      clone_actions = clone_actions->next) {
 794 
 795                     before = clone_actions->data;
 796 
 797                     crm_trace("Testing '%s then %s' for %s",
 798                               first->uuid, before->action->uuid, start->uuid);
 799 
 800                     CRM_ASSERT(before->action->rsc != NULL);
 801                     first_rsc = before->action->rsc;
 802                     break;
 803                 }
 804 
 805             } else if (!pcmk__str_eq(first->task, PCMK_ACTION_START,
 806                                      pcmk__str_none)) {
 807                 crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
 808             }
 809 
 810             if (first_rsc == NULL) {
 811                 continue;
 812 
 813             } else if (pe__const_top_resource(first_rsc, false)
 814                        == pe__const_top_resource(start->rsc, false)) {
 815                 crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
 816                 continue;
 817 
 818             } else if (!pcmk__is_clone(pe__const_top_resource(first_rsc,
 819                                                               false))) {
 820                 crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
 821                 continue;
 822             }
 823 
 824             crm_debug("Applying %s before %s %d", first->uuid, start->uuid,
 825                       pe__const_top_resource(first_rsc, false)->variant);
 826 
 827             for (GList *probe_iter = probes; probe_iter != NULL;
 828                  probe_iter = probe_iter->next) {
 829 
 830                 pcmk_action_t *probe = (pcmk_action_t *) probe_iter->data;
 831 
 832                 crm_debug("Ordering %s before %s", first->uuid, probe->uuid);
 833                 order_actions(first, probe, pcmk__ar_ordered);
 834             }
 835         }
 836     }
 837 #endif
 838 }
 839 
 840 void
 841 pcmk__order_probes(pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 842 {
 843     // Add orderings for "probe then X"
 844     g_list_foreach(scheduler->resources, add_start_restart_orderings_for_rsc,
 845                    NULL);
 846     add_probe_orderings_for_stops(scheduler);
 847 
 848     order_then_probes(scheduler);
 849 }
 850 
 851 /*!
 852  * \internal
 853  * \brief Schedule any probes needed
 854  *
 855  * \param[in,out] scheduler  Scheduler data
 856  *
 857  * \note This may also schedule fencing of failed remote nodes.
 858  */
 859 void
 860 pcmk__schedule_probes(pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 861 {
 862     // Schedule probes on each node in the cluster as needed
 863     for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
 864         pcmk_node_t *node = (pcmk_node_t *) iter->data;
 865         const char *probed = NULL;
 866 
 867         if (!node->details->online) { // Don't probe offline nodes
 868             if (pcmk__is_failed_remote_node(node)) {
 869                 pe_fence_node(scheduler, node,
 870                               "the connection is unrecoverable", FALSE);
 871             }
 872             continue;
 873 
 874         } else if (node->details->unclean) { // ... or nodes that need fencing
 875             continue;
 876 
 877         } else if (!node->details->rsc_discovery_enabled) {
 878             // The user requested that probes not be done on this node
 879             continue;
 880         }
 881 
 882         /* This is no longer needed for live clusters, since the probe_complete
 883          * node attribute will never be in the CIB. However this is still useful
 884          * for processing old saved CIBs (< 1.1.14), including the
 885          * reprobe-target_rc regression test.
 886          */
 887         probed = pcmk__node_attr(node, CRM_OP_PROBED, NULL,
 888                                  pcmk__rsc_node_current);
 889         if (probed != NULL && crm_is_true(probed) == FALSE) {
 890             pcmk_action_t *probe_op = NULL;
 891 
 892             probe_op = custom_action(NULL,
 893                                      crm_strdup_printf("%s-%s", CRM_OP_REPROBE,
 894                                                        node->details->uname),
 895                                      CRM_OP_REPROBE, node, FALSE, scheduler);
 896             pcmk__insert_meta(probe_op, PCMK__META_OP_NO_WAIT, PCMK_VALUE_TRUE);
 897             continue;
 898         }
 899 
 900         // Probe each resource in the cluster on this node, as needed
 901         pcmk__probe_resource_list(scheduler->resources, node);
 902     }
 903 }

/* [previous][next][first][last][top][bottom][index][help] */