root/lib/pacemaker/pcmk_sched_probes.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_expected_result
  2. pcmk__probe_resource_list
  3. probe_then_start
  4. guest_resource_will_stop
  5. probe_action
  6. pcmk__probe_rsc_on_node
  7. probe_needed_before_action
  8. add_probe_orderings_for_stops
  9. add_restart_orderings_for_probe
  10. clear_actions_tracking_flag
  11. add_restart_orderings_for_rsc
  12. order_then_probes
  13. pcmk__order_probes
  14. pcmk__schedule_probes

   1 /*
   2  * Copyright 2004-2022 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <glib.h>
  13 
  14 #include <crm/crm.h>
  15 #include <crm/pengine/status.h>
  16 #include <pacemaker-internal.h>
  17 #include "libpacemaker_private.h"
  18 
  19 /*!
  20  * \internal
  21  * \brief Add the expected result to a newly created probe
  22  *
  23  * \param[in] probe  Probe action to add expected result to
  24  * \param[in] rsc    Resource that probe is for
  25  * \param[in] node   Node that probe will run on
  26  */
  27 static void
  28 add_expected_result(pe_action_t *probe, pe_resource_t *rsc, pe_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
  29 {
  30     // Check whether resource is currently active on node
  31     pe_node_t *running = pe_find_node_id(rsc->running_on, node->details->id);
  32 
  33     // The expected result is what we think the resource's current state is
  34     if (running == NULL) {
  35         pe__add_action_expected_result(probe, CRM_EX_NOT_RUNNING);
  36 
  37     } else if (rsc->role == RSC_ROLE_PROMOTED) {
  38         pe__add_action_expected_result(probe, CRM_EX_PROMOTED);
  39     }
  40 }
  41 
  42 /*!
  43  * \internal
  44  * \brief Create any needed robes on a node for a list of resources
  45  *
  46  * \param[in] rscs  List of resources to create probes for
  47  * \param[in] node  Node to create probes on
  48  *
  49  * \return true if any probe was created, otherwise false
  50  */
  51 bool
  52 pcmk__probe_resource_list(GList *rscs, pe_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
  53 {
  54     bool any_created = false;
  55 
  56     for (GList *iter = rscs; iter != NULL; iter = iter->next) {
  57         pe_resource_t *rsc = (pe_resource_t *) iter->data;
  58 
  59         if (rsc->cmds->create_probe(rsc, node)) {
  60             any_created = true;
  61         }
  62     }
  63     return any_created;
  64 }
  65 
  66 /*!
  67  * \internal
  68  * \brief Order one resource's start after another's start-up probe
  69  *
  70  * \param[in] rsc1  Resource that might get start-up probe
  71  * \param[in] rsc2  Resource that might be started
  72  */
  73 static void
  74 probe_then_start(pe_resource_t *rsc1, pe_resource_t *rsc2)
     /* [previous][next][first][last][top][bottom][index][help] */
  75 {
  76     if ((rsc1->allocated_to != NULL)
  77         && (g_hash_table_lookup(rsc1->known_on,
  78                                 rsc1->allocated_to->details->id) == NULL)) {
  79 
  80         pcmk__new_ordering(rsc1, pcmk__op_key(rsc1->id, RSC_STATUS, 0), NULL,
  81                            rsc2, pcmk__op_key(rsc2->id, RSC_START, 0), NULL,
  82                            pe_order_optional, rsc1->cluster);
  83     }
  84 }
  85 
  86 /*!
  87  * \internal
  88  * \brief Check whether a guest resource will stop
  89  *
  90  * \param[in] node  Guest node to check
  91  *
  92  * \return true if guest resource will likely stop, otherwise false
  93  */
  94 static bool
  95 guest_resource_will_stop(pe_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
  96 {
  97     pe_resource_t *guest_rsc = node->details->remote_rsc->container;
  98 
  99     /* Ideally, we'd check whether the guest has a required stop, but that
 100      * information doesn't exist yet, so approximate it ...
 101      */
 102     return node->details->remote_requires_reset
 103            || node->details->unclean
 104            || pcmk_is_set(guest_rsc->flags, pe_rsc_failed)
 105            || (guest_rsc->next_role == RSC_ROLE_STOPPED)
 106 
 107            // Guest is moving
 108            || ((guest_rsc->role > RSC_ROLE_STOPPED)
 109                && (guest_rsc->allocated_to != NULL)
 110                && (pe_find_node(guest_rsc->running_on,
 111                    guest_rsc->allocated_to->details->uname) == NULL));
 112 }
 113 
 114 /*!
 115  * \internal
 116  * \brief Create a probe action for a resource on a node
 117  *
 118  * \param[in] rsc   Resource to create probe for
 119  * \param[in[ node  Node to create probe on
 120  *
 121  * \return Newly created probe action
 122  */
 123 static pe_action_t *
 124 probe_action(pe_resource_t *rsc, pe_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 125 {
 126     pe_action_t *probe = NULL;
 127     char *key = pcmk__op_key(rsc->id, RSC_STATUS, 0);
 128 
 129     crm_debug("Scheduling probe of %s %s on %s",
 130               role2text(rsc->role), rsc->id, pe__node_name(node));
 131 
 132     probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE,
 133                           rsc->cluster);
 134     pe__clear_action_flags(probe, pe_action_optional);
 135 
 136     pcmk__order_vs_unfence(rsc, node, probe, pe_order_optional);
 137     add_expected_result(probe, rsc, node);
 138     return probe;
 139 }
 140 
 141 /*!
 142  * \internal
 143  * \brief Create probes for a resource on a node, if needed
 144  *
 145  * \brief Schedule any probes needed for a resource on a node
 146  *
 147  * \param[in] rsc   Resource to create probe for
 148  * \param[in] node  Node to create probe on
 149  *
 150  * \return true if any probe was created, otherwise false
 151  */
 152 bool
 153 pcmk__probe_rsc_on_node(pe_resource_t *rsc, pe_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 154 {
 155     uint32_t flags = pe_order_optional;
 156     pe_action_t *probe = NULL;
 157     pe_node_t *allowed = NULL;
 158     pe_resource_t *top = uber_parent(rsc);
 159     const char *reason = NULL;
 160 
 161     CRM_CHECK((rsc != NULL) && (node != NULL), return false);
 162 
 163     if (!pcmk_is_set(rsc->cluster->flags, pe_flag_startup_probes)) {
 164         reason = "start-up probes are disabled";
 165         goto no_probe;
 166     }
 167 
 168     if (pe__is_guest_or_remote_node(node)) {
 169         const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
 170 
 171         if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) {
 172             reason = "Pacemaker Remote nodes cannot run stonith agents";
 173             goto no_probe;
 174 
 175         } else if (pe__is_guest_node(node)
 176                    && pe__resource_contains_guest_node(rsc->cluster, rsc)) {
 177             reason = "guest nodes cannot run resources containing guest nodes";
 178             goto no_probe;
 179 
 180         } else if (rsc->is_remote_node) {
 181             reason = "Pacemaker Remote nodes cannot host remote connections";
 182             goto no_probe;
 183         }
 184     }
 185 
 186     // If this is a collective resource, probes are created for its children
 187     if (rsc->children != NULL) {
 188         return pcmk__probe_resource_list(rsc->children, node);
 189     }
 190 
 191     if ((rsc->container != NULL) && !rsc->is_remote_node) {
 192         reason = "resource is inside a container";
 193         goto no_probe;
 194 
 195     } else if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
 196         reason = "resource is orphaned";
 197         goto no_probe;
 198 
 199     } else if (g_hash_table_lookup(rsc->known_on, node->details->id) != NULL) {
 200         reason = "resource state is already known";
 201         goto no_probe;
 202     }
 203 
 204     allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
 205 
 206     if (rsc->exclusive_discover || top->exclusive_discover) {
 207         // Exclusive discovery is enabled ...
 208 
 209         if (allowed == NULL) {
 210             // ... but this node is not allowed to run the resource
 211             reason = "resource has exclusive discovery but is not allowed "
 212                      "on node";
 213             goto no_probe;
 214 
 215         } else if (allowed->rsc_discover_mode != pe_discover_exclusive) {
 216             // ... but no constraint marks this node for discovery of resource
 217             reason = "resource has exclusive discovery but is not enabled "
 218                      "on node";
 219             goto no_probe;
 220         }
 221     }
 222 
 223     if (allowed == NULL) {
 224         allowed = node;
 225     }
 226     if (allowed->rsc_discover_mode == pe_discover_never) {
 227         reason = "node has discovery disabled";
 228         goto no_probe;
 229     }
 230 
 231     if (pe__is_guest_node(node)) {
 232         pe_resource_t *guest = node->details->remote_rsc->container;
 233 
 234         if (guest->role == RSC_ROLE_STOPPED) {
 235             // The guest is stopped, so we know no resource is active there
 236             reason = "node's guest is stopped";
 237             probe_then_start(guest, top);
 238             goto no_probe;
 239 
 240         } else if (guest_resource_will_stop(node)) {
 241             reason = "node's guest will stop";
 242 
 243             // Order resource start after guest stop (in case it's restarting)
 244             pcmk__new_ordering(guest, pcmk__op_key(guest->id, RSC_STOP, 0),
 245                                NULL, top, pcmk__op_key(top->id, RSC_START, 0),
 246                                NULL, pe_order_optional, rsc->cluster);
 247             goto no_probe;
 248         }
 249     }
 250 
 251     // We've eliminated all cases where a probe is not needed, so now it is
 252     probe = probe_action(rsc, node);
 253 
 254     /* Order the probe relative to the parent -- or the resource itself if
 255      * cloned or a fence device when unfencing is used.
 256      */
 257     if ((pcmk_is_set(rsc->flags, pe_rsc_fence_device)
 258          && pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing))
 259         || !pe_rsc_is_clone(top)) {
 260         top = rsc;
 261     }
 262 
 263     if (!pcmk_is_set(probe->flags, pe_action_runnable)
 264         && (rsc->running_on == NULL)) {
 265         /* Prevent the parent from starting if the resource can't, but don't
 266          * cause the parent to stop if already active.
 267          */
 268         pe__set_order_flags(flags, pe_order_runnable_left);
 269     }
 270 
 271     // Start or reload the parent after probing the resource
 272     pcmk__new_ordering(rsc, NULL, probe,
 273                        top, pcmk__op_key(top->id, RSC_START, 0), NULL,
 274                        flags, rsc->cluster);
 275     pcmk__new_ordering(rsc, NULL, probe, top, reload_key(rsc), NULL,
 276                        pe_order_optional, rsc->cluster);
 277 
 278     return true;
 279 
 280 no_probe:
 281     pe_rsc_trace(rsc,
 282                  "Skipping probe for %s on %s because %s",
 283                  rsc->id, node->details->id, reason);
 284     return false;
 285 }
 286 
 287 /*!
 288  * \internal
 289  * \brief Check whether a probe should be ordered before another action
 290  *
 291  * \param[in] probe  Probe action to check
 292  * \param[in] then   Other action to check
 293  *
 294  * \return true if \p probe should be ordered before \p then, otherwise false
 295  */
 296 static bool
 297 probe_needed_before_action(pe_action_t *probe, pe_action_t *then)
     /* [previous][next][first][last][top][bottom][index][help] */
 298 {
 299     // Probes on a node are performed after unfencing it, not before
 300     if (pcmk__str_eq(then->task, CRM_OP_FENCE, pcmk__str_casei)
 301          && (probe->node != NULL) && (then->node != NULL)
 302          && (probe->node->details == then->node->details)) {
 303         const char *op = g_hash_table_lookup(then->meta, "stonith_action");
 304 
 305         if (pcmk__str_eq(op, "on", pcmk__str_casei)) {
 306             return false;
 307         }
 308     }
 309 
 310     // Probes should be done on a node before shutting it down
 311     if (pcmk__str_eq(then->task, CRM_OP_SHUTDOWN, pcmk__str_none)
 312         && (probe->node != NULL) && (then->node != NULL)
 313         && (probe->node->details != then->node->details)) {
 314         return false;
 315     }
 316 
 317     // Otherwise probes should always be done before any other action
 318     return true;
 319 }
 320 
 321 /*!
 322  * \internal
 323  * \brief Add implicit "probe then X" orderings for "stop then X" orderings
 324  *
 325  * If the state of a resource is not known yet, a probe will be scheduled,
 326  * expecting a "not running" result. If the probe fails, a stop will not be
 327  * scheduled until the next transition. Thus, if there are ordering constraints
 328  * like "stop this resource then do something else that's not for the same
 329  * resource", add implicit "probe this resource then do something" equivalents
 330  * so the relation is upheld until we know whether a stop is needed.
 331  *
 332  * \param[in] data_set  Cluster working set
 333  */
 334 static void
 335 add_probe_orderings_for_stops(pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 336 {
 337     for (GList *iter = data_set->ordering_constraints; iter != NULL;
 338          iter = iter->next) {
 339 
 340         pe__ordering_t *order = iter->data;
 341         uint32_t order_flags = pe_order_optional;
 342         GList *probes = NULL;
 343         GList *then_actions = NULL;
 344 
 345         // Skip disabled orderings
 346         if (order->flags == pe_order_none) {
 347             continue;
 348         }
 349 
 350         // Skip non-resource orderings, and orderings for the same resource
 351         if ((order->lh_rsc == NULL) || (order->lh_rsc == order->rh_rsc)) {
 352             continue;
 353         }
 354 
 355         // Skip invalid orderings (shouldn't be possible)
 356         if (((order->lh_action == NULL) && (order->lh_action_task == NULL)) ||
 357             ((order->rh_action == NULL) && (order->rh_action_task == NULL))) {
 358             continue;
 359         }
 360 
 361         // Skip orderings for first actions other than stop
 362         if ((order->lh_action != NULL)
 363             && !pcmk__str_eq(order->lh_action->task, RSC_STOP, pcmk__str_none)) {
 364             continue;
 365         } else if ((order->lh_action == NULL)
 366                    && !pcmk__ends_with(order->lh_action_task, "_" RSC_STOP "_0")) {
 367             continue;
 368         }
 369 
 370         /* Do not imply a probe ordering for a resource inside of a stopping
 371          * container. Otherwise, it might introduce a transition loop, since a
 372          * probe could be scheduled after the container starts again.
 373          */
 374         if ((order->rh_rsc != NULL)
 375             && (order->lh_rsc->container == order->rh_rsc)) {
 376 
 377             if ((order->rh_action != NULL)
 378                 && pcmk__str_eq(order->rh_action->task, RSC_STOP,
 379                                 pcmk__str_none)) {
 380                 continue;
 381             } else if ((order->rh_action == NULL)
 382                        && pcmk__ends_with(order->rh_action_task,
 383                                           "_" RSC_STOP "_0")) {
 384                 continue;
 385             }
 386         }
 387 
 388         // Preserve certain order options for future filtering
 389         if (pcmk_is_set(order->flags, pe_order_apply_first_non_migratable)) {
 390             pe__set_order_flags(order_flags,
 391                                 pe_order_apply_first_non_migratable);
 392         }
 393         if (pcmk_is_set(order->flags, pe_order_same_node)) {
 394             pe__set_order_flags(order_flags, pe_order_same_node);
 395         }
 396 
 397         // Preserve certain order types for future filtering
 398         if ((order->flags == pe_order_anti_colocation)
 399             || (order->flags == pe_order_load)) {
 400             order_flags = order->flags;
 401         }
 402 
 403         // List all scheduled probes for the first resource
 404         probes = pe__resource_actions(order->lh_rsc, NULL, RSC_STATUS, FALSE);
 405         if (probes == NULL) { // There aren't any
 406             continue;
 407         }
 408 
 409         // List all relevant "then" actions
 410         if (order->rh_action != NULL) {
 411             then_actions = g_list_prepend(NULL, order->rh_action);
 412 
 413         } else if (order->rh_rsc != NULL) {
 414             then_actions = find_actions(order->rh_rsc->actions,
 415                                         order->rh_action_task, NULL);
 416             if (then_actions == NULL) { // There aren't any
 417                 g_list_free(probes);
 418                 continue;
 419             }
 420         }
 421 
 422         crm_trace("Implying 'probe then' orderings for '%s then %s' "
 423                   "(id=%d, type=%.6x)",
 424                   order->lh_action? order->lh_action->uuid : order->lh_action_task,
 425                   order->rh_action? order->rh_action->uuid : order->rh_action_task,
 426                   order->id, order->flags);
 427 
 428         for (GList *probe_iter = probes; probe_iter != NULL;
 429              probe_iter = probe_iter->next) {
 430 
 431             pe_action_t *probe = (pe_action_t *) probe_iter->data;
 432 
 433             for (GList *then_iter = then_actions; then_iter != NULL;
 434                  then_iter = then_iter->next) {
 435 
 436                 pe_action_t *then = (pe_action_t *) then_iter->data;
 437 
 438                 if (probe_needed_before_action(probe, then)) {
 439                     order_actions(probe, then, order_flags);
 440                 }
 441             }
 442         }
 443 
 444         g_list_free(then_actions);
 445         g_list_free(probes);
 446     }
 447 }
 448 
 449 /*!
 450  * \internal
 451  * \brief Order probes before restarts and re-promotes
 452  *
 453  * If a given ordering is a "probe then start" or "probe then promote" ordering,
 454  * add an implicit "probe then stop/demote" ordering in case the action is part
 455  * of a restart/re-promote, and do the same recursively for all actions ordered
 456  * after the "then" action.
 457  *
 458  * \param[in] probe     Probe as 'first' action in an ordering
 459  * \param[in] after     'then' action in the ordering
 460  * \param[in] data_set  Cluster working set
 461  */
 462 static void
 463 add_restart_orderings_for_probe(pe_action_t *probe, pe_action_t *after,
     /* [previous][next][first][last][top][bottom][index][help] */
 464                                 pe_working_set_t *data_set)
 465 {
 466     GList *iter = NULL;
 467     bool interleave = false;
 468     pe_resource_t *compatible_rsc = NULL;
 469 
 470     // Validate that this is a resource probe followed by some action
 471     if ((after == NULL) || (probe == NULL) || (probe->rsc == NULL)
 472         || (probe->rsc->variant != pe_native)
 473         || !pcmk__str_eq(probe->task, RSC_STATUS, pcmk__str_casei)) {
 474         return;
 475     }
 476 
 477     // Avoid running into any possible loop
 478     if (pcmk_is_set(after->flags, pe_action_tracking)) {
 479         return;
 480     }
 481     pe__set_action_flags(after, pe_action_tracking);
 482 
 483     crm_trace("Adding probe restart orderings for '%s@%s then %s@%s'",
 484               probe->uuid, pe__node_name(probe->node),
 485               after->uuid, pe__node_name(after->node));
 486 
 487     /* Add restart orderings if "then" is for a different primitive.
 488      * Orderings for collective resources will be added later.
 489      */
 490     if ((after->rsc != NULL) && (after->rsc->variant == pe_native)
 491         && (probe->rsc != after->rsc)) {
 492 
 493             GList *then_actions = NULL;
 494 
 495             if (pcmk__str_eq(after->task, RSC_START, pcmk__str_casei)) {
 496                 then_actions = pe__resource_actions(after->rsc, NULL, RSC_STOP,
 497                                                     FALSE);
 498 
 499             } else if (pcmk__str_eq(after->task, RSC_PROMOTE, pcmk__str_casei)) {
 500                 then_actions = pe__resource_actions(after->rsc, NULL,
 501                                                     RSC_DEMOTE, FALSE);
 502             }
 503 
 504             for (iter = then_actions; iter != NULL; iter = iter->next) {
 505                 pe_action_t *then = (pe_action_t *) iter->data;
 506 
 507                 // Skip pseudo-actions (for example, those implied by fencing)
 508                 if (!pcmk_is_set(then->flags, pe_action_pseudo)) {
 509                     order_actions(probe, then, pe_order_optional);
 510                 }
 511             }
 512             g_list_free(then_actions);
 513     }
 514 
 515     /* Detect whether "then" is an interleaved clone action. For these, we want
 516      * to add orderings only for the relevant instance.
 517      */
 518     if ((after->rsc != NULL)
 519         && (after->rsc->variant > pe_group)) {
 520         const char *interleave_s = g_hash_table_lookup(after->rsc->meta,
 521                                                        XML_RSC_ATTR_INTERLEAVE);
 522 
 523         interleave = crm_is_true(interleave_s);
 524         if (interleave) {
 525             compatible_rsc = find_compatible_child(probe->rsc,
 526                                                    after->rsc,
 527                                                    RSC_ROLE_UNKNOWN,
 528                                                    FALSE);
 529         }
 530     }
 531 
 532     /* Now recursively do the same for all actions ordered after "then". This
 533      * also handles collective resources since the collective action will be
 534      * ordered before its individual instances' actions.
 535      */
 536     for (iter = after->actions_after; iter != NULL; iter = iter->next) {
 537         pe_action_wrapper_t *after_wrapper = (pe_action_wrapper_t *) iter->data;
 538 
 539         /* pe_order_implies_then is the reason why a required A.start
 540          * implies/enforces B.start to be required too, which is the cause of
 541          * B.restart/re-promote.
 542          *
 543          * Not sure about pe_order_implies_then_on_node though. It's now only
 544          * used for unfencing case, which tends to introduce transition
 545          * loops...
 546          */
 547         if (!pcmk_is_set(after_wrapper->type, pe_order_implies_then)) {
 548             /* The order type between a group/clone and its child such as
 549              * B.start-> B_child.start is:
 550              * pe_order_implies_first_printed | pe_order_runnable_left
 551              *
 552              * Proceed through the ordering chain and build dependencies with
 553              * its children.
 554              */
 555             if ((after->rsc == NULL)
 556                 || (after->rsc->variant < pe_group)
 557                 || (probe->rsc->parent == after->rsc)
 558                 || (after_wrapper->action->rsc == NULL)
 559                 || (after_wrapper->action->rsc->variant > pe_group)
 560                 || (after->rsc != after_wrapper->action->rsc->parent)) {
 561                 continue;
 562             }
 563 
 564             /* Proceed to the children of a group or a non-interleaved clone.
 565              * For an interleaved clone, proceed only to the relevant child.
 566              */
 567             if ((after->rsc->variant > pe_group) && interleave
 568                 && ((compatible_rsc == NULL)
 569                     || (compatible_rsc != after_wrapper->action->rsc))) {
 570                 continue;
 571             }
 572         }
 573 
 574         crm_trace("Recursively adding probe restart orderings for "
 575                   "'%s@%s then %s@%s' (type=%#.6x)",
 576                   after->uuid, pe__node_name(after->node),
 577                   after_wrapper->action->uuid,
 578                   pe__node_name(after_wrapper->action->node),
 579                   after_wrapper->type);
 580 
 581         add_restart_orderings_for_probe(probe, after_wrapper->action, data_set);
 582     }
 583 }
 584 
 585 /*!
 586  * \internal
 587  * \brief Clear the tracking flag on all scheduled actions
 588  *
 589  * \param[in] data_set  Cluster working set
 590  */
 591 static void
 592 clear_actions_tracking_flag(pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 593 {
 594     GList *gIter = NULL;
 595 
 596     for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
 597         pe_action_t *action = (pe_action_t *) gIter->data;
 598 
 599         pe__clear_action_flags(action, pe_action_tracking);
 600     }
 601 }
 602 
 603 /*!
 604  * \internal
 605  * \brief Add restart orderings for any scheduled probes for a given resource
 606  *
 607  * \param[in] rsc       Resource whose probes should be ordered
 608  * \param[in] data_set  Cluster working set
 609  */
 610 static void
 611 add_restart_orderings_for_rsc(pe_resource_t *rsc, pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 612 {
 613     GList *probes = NULL;
 614 
 615     // For collective resources, order each instance recursively
 616     if (rsc->variant != pe_native) {
 617         g_list_foreach(rsc->children, (GFunc) add_restart_orderings_for_rsc,
 618                        data_set);
 619         return;
 620     }
 621 
 622     // Find all probes for given resource
 623     probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
 624 
 625     // Add probe restart orderings for each probe found
 626     for (GList *iter = probes; iter != NULL; iter = iter->next) {
 627         pe_action_t *probe = (pe_action_t *) iter->data;
 628 
 629         for (GList *then_iter = probe->actions_after; then_iter != NULL;
 630              then_iter = then_iter->next) {
 631 
 632             pe_action_wrapper_t *then = (pe_action_wrapper_t *) then_iter->data;
 633 
 634             add_restart_orderings_for_probe(probe, then->action, data_set);
 635             clear_actions_tracking_flag(data_set);
 636         }
 637     }
 638 
 639     g_list_free(probes);
 640 }
 641 
 642 /*!
 643  * \internal
 644  * \brief Add "A then probe B" orderings for "A then B" orderings
 645  *
 646  * \param[in] data_set  Cluster working set
 647  *
 648  * \note This function is currently disabled (see next comment).
 649  */
 650 static void
 651 order_then_probes(pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 652 {
 653 #if 0
 654     /* Given an ordering "A then B", we would prefer to wait for A to be started
 655      * before probing B.
 656      *
 657      * For example, if A is a filesystem which B can't even run without, it
 658      * would be helpful if the author of B's agent could assume that A is
 659      * running before B.monitor will be called.
 660      *
 661      * However, we can't _only_ probe after A is running, otherwise we wouldn't
 662      * detect the state of B if A could not be started. We can't even do an
 663      * opportunistic version of this, because B may be moving:
 664      *
 665      *   A.stop -> A.start -> B.probe -> B.stop -> B.start
 666      *
 667      * and if we add B.stop -> A.stop here, we get a loop:
 668      *
 669      *   A.stop -> A.start -> B.probe -> B.stop -> A.stop
 670      *
 671      * We could kill the "B.probe -> B.stop" dependency, but that could mean
 672      * stopping B "too" soon, because B.start must wait for the probe, and
 673      * we don't want to stop B if we can't start it.
 674      *
 675      * We could add the ordering only if A is an anonymous clone with
 676      * clone-max == node-max (since we'll never be moving it). However, we could
 677      * still be stopping one instance at the same time as starting another.
 678      *
 679      * The complexity of checking for allowed conditions combined with the ever
 680      * narrowing use case suggests that this code should remain disabled until
 681      * someone gets smarter.
 682      */
 683     for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
 684         pe_resource_t *rsc = (pe_resource_t *) iter->data;
 685 
 686         pe_action_t *start = NULL;
 687         GList *actions = NULL;
 688         GList *probes = NULL;
 689 
 690         actions = pe__resource_actions(rsc, NULL, RSC_START, FALSE);
 691 
 692         if (actions) {
 693             start = actions->data;
 694             g_list_free(actions);
 695         }
 696 
 697         if (start == NULL) {
 698             crm_err("No start action for %s", rsc->id);
 699             continue;
 700         }
 701 
 702         probes = pe__resource_actions(rsc, NULL, RSC_STATUS, FALSE);
 703 
 704         for (actions = start->actions_before; actions != NULL;
 705              actions = actions->next) {
 706 
 707             pe_action_wrapper_t *before = (pe_action_wrapper_t *) actions->data;
 708 
 709             pe_action_t *first = before->action;
 710             pe_resource_t *first_rsc = first->rsc;
 711 
 712             if (first->required_runnable_before) {
 713                 for (GList *clone_actions = first->actions_before;
 714                      clone_actions != NULL;
 715                      clone_actions = clone_actions->next) {
 716 
 717                     before = (pe_action_wrapper_t *) clone_actions->data;
 718 
 719                     crm_trace("Testing '%s then %s' for %s",
 720                               first->uuid, before->action->uuid, start->uuid);
 721 
 722                     CRM_ASSERT(before->action->rsc != NULL);
 723                     first_rsc = before->action->rsc;
 724                     break;
 725                 }
 726 
 727             } else if (!pcmk__str_eq(first->task, RSC_START, pcmk__str_none)) {
 728                 crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
 729             }
 730 
 731             if (first_rsc == NULL) {
 732                 continue;
 733 
 734             } else if (uber_parent(first_rsc) == uber_parent(start->rsc)) {
 735                 crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
 736                 continue;
 737 
 738             } else if (!pe_rsc_is_clone(uber_parent(first_rsc))) {
 739                 crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
 740                 continue;
 741             }
 742 
 743             crm_err("Applying %s before %s %d", first->uuid, start->uuid,
 744                     uber_parent(first_rsc)->variant);
 745 
 746             for (GList *probe_iter = probes; probe_iter != NULL;
 747                  probe_iter = probe_iter->next) {
 748 
 749                 pe_action_t *probe = (pe_action_t *) probe_iter->data;
 750 
 751                 crm_err("Ordering %s before %s", first->uuid, probe->uuid);
 752                 order_actions(first, probe, pe_order_optional);
 753             }
 754         }
 755     }
 756 #endif
 757 }
 758 
 759 void
 760 pcmk__order_probes(pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 761 {
 762     // Add orderings for "probe then X"
 763     g_list_foreach(data_set->resources, (GFunc) add_restart_orderings_for_rsc,
 764                    data_set);
 765     add_probe_orderings_for_stops(data_set);
 766 
 767     order_then_probes(data_set);
 768 }
 769 
 770 /*!
 771  * \internal
 772  * \brief Schedule any probes needed
 773  *
 774  * \param[in] data_set  Cluster working set
 775  *
 776  * \note This may also schedule fencing of failed remote nodes.
 777  */
 778 void
 779 pcmk__schedule_probes(pe_working_set_t *data_set)
     /* [previous][next][first][last][top][bottom][index][help] */
 780 {
 781     // Schedule probes on each node in the cluster as needed
 782     for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
 783         pe_node_t *node = (pe_node_t *) iter->data;
 784         const char *probed = NULL;
 785 
 786         if (!node->details->online) { // Don't probe offline nodes
 787             if (pcmk__is_failed_remote_node(node)) {
 788                 pe_fence_node(data_set, node,
 789                               "the connection is unrecoverable", FALSE);
 790             }
 791             continue;
 792 
 793         } else if (node->details->unclean) { // ... or nodes that need fencing
 794             continue;
 795 
 796         } else if (!node->details->rsc_discovery_enabled) {
 797             // The user requested that probes not be done on this node
 798             continue;
 799         }
 800 
 801         /* This is no longer needed for live clusters, since the probe_complete
 802          * node attribute will never be in the CIB. However this is still useful
 803          * for processing old saved CIBs (< 1.1.14), including the
 804          * reprobe-target_rc regression test.
 805          */
 806         probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
 807         if (probed != NULL && crm_is_true(probed) == FALSE) {
 808             pe_action_t *probe_op = NULL;
 809 
 810             probe_op = custom_action(NULL,
 811                                      crm_strdup_printf("%s-%s", CRM_OP_REPROBE,
 812                                                        node->details->uname),
 813                                      CRM_OP_REPROBE, node, FALSE, TRUE,
 814                                      data_set);
 815             add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT,
 816                            XML_BOOLEAN_TRUE);
 817             continue;
 818         }
 819 
 820         // Probe each resource in the cluster on this node, as needed
 821         pcmk__probe_resource_list(data_set->resources, node);
 822     }
 823 }

/* [previous][next][first][last][top][bottom][index][help] */