root/lib/pacemaker/pcmk_sched_instances.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. can_run_instance
  2. ban_unavailable_allowed_nodes
  3. new_node_table
  4. apply_parent_colocations
  5. cmp_instance_by_colocation
  6. did_fail
  7. node_is_allowed
  8. pcmk__cmp_instance_number
  9. pcmk__cmp_instance
  10. increment_parent_count
  11. assign_instance
  12. assign_instance_early
  13. reset_allowed_node_counts
  14. preferred_node
  15. pcmk__assign_instances
  16. check_instance_state
  17. pcmk__create_instance_actions
  18. get_instance_list
  19. free_instance_list
  20. pcmk__instance_matches
  21. find_compatible_instance_on_node
  22. pcmk__find_compatible_instance
  23. unassign_if_mandatory
  24. find_instance_action
  25. orig_action_name
  26. update_interleaved_actions
  27. can_interleave_actions
  28. update_noninterleaved_actions
  29. pcmk__instance_update_ordered_actions
  30. pcmk__collective_action_flags

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 /* This file is intended for code usable with both clone instances and bundle
  11  * replica containers.
  12  */
  13 
  14 #include <crm_internal.h>
  15 #include <crm/common/xml.h>
  16 #include <pacemaker-internal.h>
  17 #include "libpacemaker_private.h"
  18 
  19 /*!
  20  * \internal
  21  * \brief Check whether a node is allowed to run an instance
  22  *
  23  * \param[in] instance      Clone instance or bundle container to check
  24  * \param[in] node          Node to check
  25  * \param[in] max_per_node  Maximum number of instances allowed to run on a node
  26  *
  27  * \return true if \p node is allowed to run \p instance, otherwise false
  28  */
  29 static bool
  30 can_run_instance(const pcmk_resource_t *instance, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
  31                  int max_per_node)
  32 {
  33     pcmk_node_t *allowed_node = NULL;
  34 
  35     if (pcmk_is_set(instance->flags, pcmk_rsc_removed)) {
  36         pcmk__rsc_trace(instance, "%s cannot run on %s: orphaned",
  37                         instance->id, pcmk__node_name(node));
  38         return false;
  39     }
  40 
  41     if (!pcmk__node_available(node, false, false)) {
  42         pcmk__rsc_trace(instance,
  43                         "%s cannot run on %s: node cannot run resources",
  44                         instance->id, pcmk__node_name(node));
  45         return false;
  46     }
  47 
  48     allowed_node = pcmk__top_allowed_node(instance, node);
  49     if (allowed_node == NULL) {
  50         crm_warn("%s cannot run on %s: node not allowed",
  51                  instance->id, pcmk__node_name(node));
  52         return false;
  53     }
  54 
  55     if (allowed_node->weight < 0) {
  56         pcmk__rsc_trace(instance,
  57                         "%s cannot run on %s: parent score is %s there",
  58                         instance->id, pcmk__node_name(node),
  59                         pcmk_readable_score(allowed_node->weight));
  60         return false;
  61     }
  62 
  63     if (allowed_node->count >= max_per_node) {
  64         pcmk__rsc_trace(instance,
  65                         "%s cannot run on %s: node already has %d instance%s",
  66                         instance->id, pcmk__node_name(node), max_per_node,
  67                         pcmk__plural_s(max_per_node));
  68         return false;
  69     }
  70 
  71     pcmk__rsc_trace(instance, "%s can run on %s (%d already running)",
  72                     instance->id, pcmk__node_name(node), allowed_node->count);
  73     return true;
  74 }
  75 
  76 /*!
  77  * \internal
  78  * \brief Ban a clone instance or bundle replica from unavailable allowed nodes
  79  *
  80  * \param[in,out] instance      Clone instance or bundle replica to ban
  81  * \param[in]     max_per_node  Maximum instances allowed to run on a node
  82  */
  83 static void
  84 ban_unavailable_allowed_nodes(pcmk_resource_t *instance, int max_per_node)
     /* [previous][next][first][last][top][bottom][index][help] */
  85 {
  86     if (instance->allowed_nodes != NULL) {
  87         GHashTableIter iter;
  88         pcmk_node_t *node = NULL;
  89 
  90         g_hash_table_iter_init(&iter, instance->allowed_nodes);
  91         while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
  92             if (!can_run_instance(instance, node, max_per_node)) {
  93                 pcmk__rsc_trace(instance, "Banning %s from unavailable node %s",
  94                                 instance->id, pcmk__node_name(node));
  95                 node->weight = -PCMK_SCORE_INFINITY;
  96                 for (GList *child_iter = instance->children;
  97                      child_iter != NULL; child_iter = child_iter->next) {
  98                     pcmk_resource_t *child = child_iter->data;
  99                     pcmk_node_t *child_node = NULL;
 100 
 101                     child_node = g_hash_table_lookup(child->allowed_nodes,
 102                                                      node->details->id);
 103                     if (child_node != NULL) {
 104                         pcmk__rsc_trace(instance,
 105                                         "Banning %s child %s "
 106                                         "from unavailable node %s",
 107                                         instance->id, child->id,
 108                                         pcmk__node_name(node));
 109                         child_node->weight = -PCMK_SCORE_INFINITY;
 110                     }
 111                 }
 112             }
 113         }
 114     }
 115 }
 116 
 117 /*!
 118  * \internal
 119  * \brief Create a hash table with a single node in it
 120  *
 121  * \param[in] node  Node to copy into new table
 122  *
 123  * \return Newly created hash table containing a copy of \p node
 124  * \note The caller is responsible for freeing the result with
 125  *       g_hash_table_destroy().
 126  */
 127 static GHashTable *
 128 new_node_table(pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 129 {
 130     GHashTable *table = pcmk__strkey_table(NULL, free);
 131 
 132     node = pe__copy_node(node);
 133     g_hash_table_insert(table, (gpointer) node->details->id, node);
 134     return table;
 135 }
 136 
 137 /*!
 138  * \internal
 139  * \brief Apply a resource's parent's colocation scores to a node table
 140  *
 141  * \param[in]     rsc    Resource whose colocations should be applied
 142  * \param[in,out] nodes  Node table to apply colocations to
 143  */
 144 static void
 145 apply_parent_colocations(const pcmk_resource_t *rsc, GHashTable **nodes)
     /* [previous][next][first][last][top][bottom][index][help] */
 146 {
 147     GList *colocations = pcmk__this_with_colocations(rsc);
 148 
 149     for (const GList *iter = colocations; iter != NULL; iter = iter->next) {
 150         const pcmk__colocation_t *colocation = iter->data;
 151         pcmk_resource_t *other = colocation->primary;
 152         float factor = colocation->score / (float) PCMK_SCORE_INFINITY;
 153 
 154         other->cmds->add_colocated_node_scores(other, rsc, rsc->id, nodes,
 155                                                colocation, factor,
 156                                                pcmk__coloc_select_default);
 157     }
 158     g_list_free(colocations);
 159     colocations = pcmk__with_this_colocations(rsc);
 160 
 161     for (const GList *iter = colocations; iter != NULL; iter = iter->next) {
 162         const pcmk__colocation_t *colocation = iter->data;
 163         pcmk_resource_t *other = colocation->dependent;
 164         float factor = colocation->score / (float) PCMK_SCORE_INFINITY;
 165 
 166         if (!pcmk__colocation_has_influence(colocation, rsc)) {
 167             continue;
 168         }
 169         other->cmds->add_colocated_node_scores(other, rsc, rsc->id, nodes,
 170                                                colocation, factor,
 171                                                pcmk__coloc_select_nonnegative);
 172     }
 173     g_list_free(colocations);
 174 }
 175 
 176 /*!
 177  * \internal
 178  * \brief Compare clone or bundle instances based on colocation scores
 179  *
 180  * Determine the relative order in which two clone or bundle instances should be
 181  * assigned to nodes, considering the scores of colocation constraints directly
 182  * or indirectly involving them.
 183  *
 184  * \param[in] instance1  First instance to compare
 185  * \param[in] instance2  Second instance to compare
 186  *
 187  * \return A negative number if \p instance1 should be assigned first,
 188  *         a positive number if \p instance2 should be assigned first,
 189  *         or 0 if assignment order doesn't matter
 190  */
 191 static int
 192 cmp_instance_by_colocation(const pcmk_resource_t *instance1,
     /* [previous][next][first][last][top][bottom][index][help] */
 193                            const pcmk_resource_t *instance2)
 194 {
 195     int rc = 0;
 196     pcmk_node_t *node1 = NULL;
 197     pcmk_node_t *node2 = NULL;
 198     pcmk_node_t *current_node1 = pcmk__current_node(instance1);
 199     pcmk_node_t *current_node2 = pcmk__current_node(instance2);
 200     GHashTable *colocated_scores1 = NULL;
 201     GHashTable *colocated_scores2 = NULL;
 202 
 203     pcmk__assert((instance1 != NULL) && (instance1->parent != NULL)
 204                  && (instance2 != NULL) && (instance2->parent != NULL)
 205                  && (current_node1 != NULL) && (current_node2 != NULL));
 206 
 207     // Create node tables initialized with each node
 208     colocated_scores1 = new_node_table(current_node1);
 209     colocated_scores2 = new_node_table(current_node2);
 210 
 211     // Apply parental colocations
 212     apply_parent_colocations(instance1, &colocated_scores1);
 213     apply_parent_colocations(instance2, &colocated_scores2);
 214 
 215     // Find original nodes again, with scores updated for colocations
 216     node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id);
 217     node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id);
 218 
 219     // Compare nodes by updated scores
 220     if (node1->weight < node2->weight) {
 221         crm_trace("Assign %s (%d on %s) after %s (%d on %s)",
 222                   instance1->id, node1->weight, pcmk__node_name(node1),
 223                   instance2->id, node2->weight, pcmk__node_name(node2));
 224         rc = 1;
 225 
 226     } else if (node1->weight > node2->weight) {
 227         crm_trace("Assign %s (%d on %s) before %s (%d on %s)",
 228                   instance1->id, node1->weight, pcmk__node_name(node1),
 229                   instance2->id, node2->weight, pcmk__node_name(node2));
 230         rc = -1;
 231     }
 232 
 233     g_hash_table_destroy(colocated_scores1);
 234     g_hash_table_destroy(colocated_scores2);
 235     return rc;
 236 }
 237 
 238 /*!
 239  * \internal
 240  * \brief Check whether a resource or any of its children are failed
 241  *
 242  * \param[in] rsc  Resource to check
 243  *
 244  * \return true if \p rsc or any of its children are failed, otherwise false
 245  */
 246 static bool
 247 did_fail(const pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 248 {
 249     if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
 250         return true;
 251     }
 252     for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
 253         if (did_fail((const pcmk_resource_t *) iter->data)) {
 254             return true;
 255         }
 256     }
 257     return false;
 258 }
 259 
 260 /*!
 261  * \internal
 262  * \brief Check whether a node is allowed to run a resource
 263  *
 264  * \param[in]     rsc   Resource to check
 265  * \param[in,out] node  Node to check (will be set NULL if not allowed)
 266  *
 267  * \return true if *node is either NULL or allowed for \p rsc, otherwise false
 268  */
 269 static bool
 270 node_is_allowed(const pcmk_resource_t *rsc, pcmk_node_t **node)
     /* [previous][next][first][last][top][bottom][index][help] */
 271 {
 272     if (*node != NULL) {
 273         pcmk_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes,
 274                                                    (*node)->details->id);
 275 
 276         if ((allowed == NULL) || (allowed->weight < 0)) {
 277             pcmk__rsc_trace(rsc, "%s: current location (%s) is unavailable",
 278                             rsc->id, pcmk__node_name(*node));
 279             *node = NULL;
 280             return false;
 281         }
 282     }
 283     return true;
 284 }
 285 
 286 /*!
 287  * \internal
 288  * \brief Compare two clone or bundle instances' instance numbers
 289  *
 290  * \param[in] a  First instance to compare
 291  * \param[in] b  Second instance to compare
 292  *
 293  * \return A negative number if \p a's instance number is lower,
 294  *         a positive number if \p b's instance number is lower,
 295  *         or 0 if their instance numbers are the same
 296  */
 297 gint
 298 pcmk__cmp_instance_number(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
 299 {
 300     const pcmk_resource_t *instance1 = (const pcmk_resource_t *) a;
 301     const pcmk_resource_t *instance2 = (const pcmk_resource_t *) b;
 302     char *div1 = NULL;
 303     char *div2 = NULL;
 304 
 305     pcmk__assert((instance1 != NULL) && (instance2 != NULL));
 306 
 307     // Clone numbers are after a colon, bundle numbers after a dash
 308     div1 = strrchr(instance1->id, ':');
 309     if (div1 == NULL) {
 310         div1 = strrchr(instance1->id, '-');
 311     }
 312     div2 = strrchr(instance2->id, ':');
 313     if (div2 == NULL) {
 314         div2 = strrchr(instance2->id, '-');
 315     }
 316     pcmk__assert((div1 != NULL) && (div2 != NULL));
 317 
 318     return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10));
 319 }
 320 
 321 /*!
 322  * \internal
 323  * \brief Compare clone or bundle instances according to assignment order
 324  *
 325  * Compare two clone or bundle instances according to the order they should be
 326  * assigned to nodes, preferring (in order):
 327  *
 328  *  - Active instance that is less multiply active
 329  *  - Instance that is not active on a disallowed node
 330  *  - Instance with higher configured priority
 331  *  - Active instance whose current node can run resources
 332  *  - Active instance whose parent is allowed on current node
 333  *  - Active instance whose current node has fewer other instances
 334  *  - Active instance
 335  *  - Instance that isn't failed
 336  *  - Instance whose colocations result in higher score on current node
 337  *  - Instance with lower ID in lexicographic order
 338  *
 339  * \param[in] a          First instance to compare
 340  * \param[in] b          Second instance to compare
 341  *
 342  * \return A negative number if \p a should be assigned first,
 343  *         a positive number if \p b should be assigned first,
 344  *         or 0 if assignment order doesn't matter
 345  */
 346 gint
 347 pcmk__cmp_instance(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
 348 {
 349     int rc = 0;
 350     pcmk_node_t *node1 = NULL;
 351     pcmk_node_t *node2 = NULL;
 352     unsigned int nnodes1 = 0;
 353     unsigned int nnodes2 = 0;
 354 
 355     bool can1 = true;
 356     bool can2 = true;
 357 
 358     const pcmk_resource_t *instance1 = (const pcmk_resource_t *) a;
 359     const pcmk_resource_t *instance2 = (const pcmk_resource_t *) b;
 360 
 361     pcmk__assert((instance1 != NULL) && (instance2 != NULL));
 362 
 363     node1 = instance1->fns->active_node(instance1, &nnodes1, NULL);
 364     node2 = instance2->fns->active_node(instance2, &nnodes2, NULL);
 365 
 366     /* If both instances are running and at least one is multiply
 367      * active, prefer instance that's running on fewer nodes.
 368      */
 369     if ((nnodes1 > 0) && (nnodes2 > 0)) {
 370         if (nnodes1 < nnodes2) {
 371             crm_trace("Assign %s (active on %d) before %s (active on %d): "
 372                       "less multiply active",
 373                       instance1->id, nnodes1, instance2->id, nnodes2);
 374             return -1;
 375 
 376         } else if (nnodes1 > nnodes2) {
 377             crm_trace("Assign %s (active on %d) after %s (active on %d): "
 378                       "more multiply active",
 379                       instance1->id, nnodes1, instance2->id, nnodes2);
 380             return 1;
 381         }
 382     }
 383 
 384     /* An instance that is either inactive or active on an allowed node is
 385      * preferred over an instance that is active on a no-longer-allowed node.
 386      */
 387     can1 = node_is_allowed(instance1, &node1);
 388     can2 = node_is_allowed(instance2, &node2);
 389     if (can1 && !can2) {
 390         crm_trace("Assign %s before %s: not active on a disallowed node",
 391                   instance1->id, instance2->id);
 392         return -1;
 393 
 394     } else if (!can1 && can2) {
 395         crm_trace("Assign %s after %s: active on a disallowed node",
 396                   instance1->id, instance2->id);
 397         return 1;
 398     }
 399 
 400     // Prefer instance with higher configured priority
 401     if (instance1->priority > instance2->priority) {
 402         crm_trace("Assign %s before %s: priority (%d > %d)",
 403                   instance1->id, instance2->id,
 404                   instance1->priority, instance2->priority);
 405         return -1;
 406 
 407     } else if (instance1->priority < instance2->priority) {
 408         crm_trace("Assign %s after %s: priority (%d < %d)",
 409                   instance1->id, instance2->id,
 410                   instance1->priority, instance2->priority);
 411         return 1;
 412     }
 413 
 414     // Prefer active instance
 415     if ((node1 == NULL) && (node2 == NULL)) {
 416         crm_trace("No assignment preference for %s vs. %s: inactive",
 417                   instance1->id, instance2->id);
 418         return 0;
 419 
 420     } else if (node1 == NULL) {
 421         crm_trace("Assign %s after %s: active", instance1->id, instance2->id);
 422         return 1;
 423 
 424     } else if (node2 == NULL) {
 425         crm_trace("Assign %s before %s: active", instance1->id, instance2->id);
 426         return -1;
 427     }
 428 
 429     // Prefer instance whose current node can run resources
 430     can1 = pcmk__node_available(node1, false, false);
 431     can2 = pcmk__node_available(node2, false, false);
 432     if (can1 && !can2) {
 433         crm_trace("Assign %s before %s: current node can run resources",
 434                   instance1->id, instance2->id);
 435         return -1;
 436 
 437     } else if (!can1 && can2) {
 438         crm_trace("Assign %s after %s: current node can't run resources",
 439                   instance1->id, instance2->id);
 440         return 1;
 441     }
 442 
 443     // Prefer instance whose parent is allowed to run on instance's current node
 444     node1 = pcmk__top_allowed_node(instance1, node1);
 445     node2 = pcmk__top_allowed_node(instance2, node2);
 446     if ((node1 == NULL) && (node2 == NULL)) {
 447         crm_trace("No assignment preference for %s vs. %s: "
 448                   "parent not allowed on either instance's current node",
 449                   instance1->id, instance2->id);
 450         return 0;
 451 
 452     } else if (node1 == NULL) {
 453         crm_trace("Assign %s after %s: parent not allowed on current node",
 454                   instance1->id, instance2->id);
 455         return 1;
 456 
 457     } else if (node2 == NULL) {
 458         crm_trace("Assign %s before %s: parent allowed on current node",
 459                   instance1->id, instance2->id);
 460         return -1;
 461     }
 462 
 463     // Prefer instance whose current node is running fewer other instances
 464     if (node1->count < node2->count) {
 465         crm_trace("Assign %s before %s: fewer active instances on current node",
 466                   instance1->id, instance2->id);
 467         return -1;
 468 
 469     } else if (node1->count > node2->count) {
 470         crm_trace("Assign %s after %s: more active instances on current node",
 471                   instance1->id, instance2->id);
 472         return 1;
 473     }
 474 
 475     // Prefer instance that isn't failed
 476     can1 = did_fail(instance1);
 477     can2 = did_fail(instance2);
 478     if (!can1 && can2) {
 479         crm_trace("Assign %s before %s: not failed",
 480                   instance1->id, instance2->id);
 481         return -1;
 482     } else if (can1 && !can2) {
 483         crm_trace("Assign %s after %s: failed",
 484                   instance1->id, instance2->id);
 485         return 1;
 486     }
 487 
 488     // Prefer instance with higher cumulative colocation score on current node
 489     rc = cmp_instance_by_colocation(instance1, instance2);
 490     if (rc != 0) {
 491         return rc;
 492     }
 493 
 494     // Prefer instance with lower instance number
 495     rc = pcmk__cmp_instance_number(instance1, instance2);
 496     if (rc < 0) {
 497         crm_trace("Assign %s before %s: instance number",
 498                   instance1->id, instance2->id);
 499     } else if (rc > 0) {
 500         crm_trace("Assign %s after %s: instance number",
 501                   instance1->id, instance2->id);
 502     } else {
 503         crm_trace("No assignment preference for %s vs. %s",
 504                   instance1->id, instance2->id);
 505     }
 506     return rc;
 507 }
 508 
 509 /*!
 510  * \internal
 511  * \brief Increment the parent's instance count after assigning an instance
 512  *
 513  * An instance's parent tracks how many instances have been assigned to each
 514  * node via its pcmk_node_t:count member. After assigning an instance to a node,
 515  * find the corresponding node in the parent's allowed table and increment it.
 516  *
 517  * \param[in,out] instance     Instance whose parent to update
 518  * \param[in]     assigned_to  Node to which the instance was assigned
 519  */
 520 static void
 521 increment_parent_count(pcmk_resource_t *instance,
     /* [previous][next][first][last][top][bottom][index][help] */
 522                        const pcmk_node_t *assigned_to)
 523 {
 524     pcmk_node_t *allowed = NULL;
 525 
 526     if (assigned_to == NULL) {
 527         return;
 528     }
 529     allowed = pcmk__top_allowed_node(instance, assigned_to);
 530 
 531     if (allowed == NULL) {
 532         /* The instance is allowed on the node, but its parent isn't. This
 533          * shouldn't be possible if the resource is managed, and we won't be
 534          * able to limit the number of instances assigned to the node.
 535          */
 536         CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pcmk_rsc_managed));
 537 
 538     } else {
 539         allowed->count++;
 540     }
 541 }
 542 
 543 /*!
 544  * \internal
 545  * \brief Assign an instance to a node
 546  *
 547  * \param[in,out] instance      Clone instance or bundle replica container
 548  * \param[in]     prefer        If not NULL, attempt early assignment to this
 549  *                              node, if still the best choice; otherwise,
 550  *                              perform final assignment
 551  * \param[in]     max_per_node  Assign at most this many instances to one node
 552  *
 553  * \return Node to which \p instance is assigned
 554  */
 555 static const pcmk_node_t *
 556 assign_instance(pcmk_resource_t *instance, const pcmk_node_t *prefer,
     /* [previous][next][first][last][top][bottom][index][help] */
 557                 int max_per_node)
 558 {
 559     pcmk_node_t *chosen = NULL;
 560 
 561     pcmk__rsc_trace(instance, "Assigning %s (preferring %s)", instance->id,
 562                     ((prefer == NULL)? "no node" : prefer->details->uname));
 563 
 564     if (pcmk_is_set(instance->flags, pcmk_rsc_assigning)) {
 565         pcmk__rsc_debug(instance,
 566                         "Assignment loop detected involving %s colocations",
 567                         instance->id);
 568         return NULL;
 569     }
 570     ban_unavailable_allowed_nodes(instance, max_per_node);
 571 
 572     // Failed early assignments are reversible (stop_if_fail=false)
 573     chosen = instance->cmds->assign(instance, prefer, (prefer == NULL));
 574     increment_parent_count(instance, chosen);
 575     return chosen;
 576 }
 577 
 578 /*!
 579  * \internal
 580  * \brief Try to assign an instance to its current node early
 581  *
 582  * \param[in] rsc           Clone or bundle being assigned (for logs only)
 583  * \param[in] instance      Clone instance or bundle replica container
 584  * \param[in] current       Instance's current node
 585  * \param[in] max_per_node  Maximum number of instances per node
 586  * \param[in] available     Number of instances still available for assignment
 587  *
 588  * \return \c true if \p instance was successfully assigned to its current node,
 589  *         or \c false otherwise
 590  */
 591 static bool
 592 assign_instance_early(const pcmk_resource_t *rsc, pcmk_resource_t *instance,
     /* [previous][next][first][last][top][bottom][index][help] */
 593                       const pcmk_node_t *current, int max_per_node,
 594                       int available)
 595 {
 596     const pcmk_node_t *chosen = NULL;
 597     int reserved = 0;
 598 
 599     pcmk_resource_t *parent = instance->parent;
 600     GHashTable *allowed_orig = NULL;
 601     GHashTable *allowed_orig_parent = parent->allowed_nodes;
 602     const pcmk_node_t *allowed_node = NULL;
 603 
 604     pcmk__rsc_trace(instance, "Trying to assign %s to its current node %s",
 605                     instance->id, pcmk__node_name(current));
 606 
 607     allowed_node = g_hash_table_lookup(instance->allowed_nodes,
 608                                        current->details->id);
 609     if (!pcmk__node_available(allowed_node, true, false)) {
 610         pcmk__rsc_info(instance,
 611                        "Not assigning %s to current node %s: unavailable",
 612                        instance->id, pcmk__node_name(current));
 613         return false;
 614     }
 615 
 616     /* On each iteration, if instance gets assigned to a node other than its
 617      * current one, we reserve one instance for the chosen node, unassign
 618      * instance, restore instance's original node tables, and try again. This
 619      * way, instances are proportionally assigned to nodes based on preferences,
 620      * but shuffling of specific instances is minimized. If a node will be
 621      * assigned instances at all, it preferentially receives instances that are
 622      * currently active there.
 623      *
 624      * parent->allowed_nodes tracks the number of instances assigned to each
 625      * node. If a node already has max_per_node instances assigned,
 626      * ban_unavailable_allowed_nodes() marks it as unavailable.
 627      *
 628      * In the end, we restore the original parent->allowed_nodes to undo the
 629      * changes to counts during tentative assignments. If we successfully
 630      * assigned instance to its current node, we increment that node's counter.
 631      */
 632 
 633     // Back up the allowed node tables of instance and its children recursively
 634     pcmk__copy_node_tables(instance, &allowed_orig);
 635 
 636     // Update instances-per-node counts in a scratch table
 637     parent->allowed_nodes = pcmk__copy_node_table(parent->allowed_nodes);
 638 
 639     while (reserved < available) {
 640         chosen = assign_instance(instance, current, max_per_node);
 641 
 642         if (pcmk__same_node(chosen, current)) {
 643             // Successfully assigned to current node
 644             break;
 645         }
 646 
 647         // Assignment updates scores, so restore to original state
 648         pcmk__rsc_debug(instance, "Rolling back node scores for %s",
 649                         instance->id);
 650         pcmk__restore_node_tables(instance, allowed_orig);
 651 
 652         if (chosen == NULL) {
 653             // Assignment failed, so give up
 654             pcmk__rsc_info(instance,
 655                            "Not assigning %s to current node %s: unavailable",
 656                            instance->id, pcmk__node_name(current));
 657             pcmk__set_rsc_flags(instance, pcmk_rsc_unassigned);
 658             break;
 659         }
 660 
 661         // We prefer more strongly to assign an instance to the chosen node
 662         pcmk__rsc_debug(instance,
 663                         "Not assigning %s to current node %s: %s is better",
 664                         instance->id, pcmk__node_name(current),
 665                         pcmk__node_name(chosen));
 666 
 667         // Reserve one instance for the chosen node and try again
 668         if (++reserved >= available) {
 669             pcmk__rsc_info(instance,
 670                            "Not assigning %s to current node %s: "
 671                            "other assignments are more important",
 672                            instance->id, pcmk__node_name(current));
 673 
 674         } else {
 675             pcmk__rsc_debug(instance,
 676                             "Reserved an instance of %s for %s. Retrying "
 677                             "assignment of %s to %s",
 678                             rsc->id, pcmk__node_name(chosen), instance->id,
 679                             pcmk__node_name(current));
 680         }
 681 
 682         // Clear this assignment (frees chosen); leave instance counts in parent
 683         pcmk__unassign_resource(instance);
 684         chosen = NULL;
 685     }
 686 
 687     g_hash_table_destroy(allowed_orig);
 688 
 689     // Restore original instances-per-node counts
 690     g_hash_table_destroy(parent->allowed_nodes);
 691     parent->allowed_nodes = allowed_orig_parent;
 692 
 693     if (chosen == NULL) {
 694         // Couldn't assign instance to current node
 695         return false;
 696     }
 697     pcmk__rsc_trace(instance, "Assigned %s to current node %s",
 698                     instance->id, pcmk__node_name(current));
 699     increment_parent_count(instance, chosen);
 700     return true;
 701 }
 702 
 703 /*!
 704  * \internal
 705  * \brief Reset the node counts of a resource's allowed nodes to zero
 706  *
 707  * \param[in,out] rsc  Resource to reset
 708  *
 709  * \return Number of nodes that are available to run resources
 710  */
 711 static unsigned int
 712 reset_allowed_node_counts(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 713 {
 714     unsigned int available_nodes = 0;
 715     pcmk_node_t *node = NULL;
 716     GHashTableIter iter;
 717 
 718     g_hash_table_iter_init(&iter, rsc->allowed_nodes);
 719     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 720         node->count = 0;
 721         if (pcmk__node_available(node, false, false)) {
 722             available_nodes++;
 723         }
 724     }
 725     return available_nodes;
 726 }
 727 
 728 /*!
 729  * \internal
 730  * \brief Check whether an instance has a preferred node
 731  *
 732  * \param[in] instance          Clone instance or bundle replica container
 733  * \param[in] optimal_per_node  Optimal number of instances per node
 734  *
 735  * \return Instance's current node if still available, otherwise NULL
 736  */
 737 static const pcmk_node_t *
 738 preferred_node(const pcmk_resource_t *instance, int optimal_per_node)
     /* [previous][next][first][last][top][bottom][index][help] */
 739 {
 740     const pcmk_node_t *node = NULL;
 741     const pcmk_node_t *parent_node = NULL;
 742 
 743     // Check whether instance is active, healthy, and not yet assigned
 744     if ((instance->running_on == NULL)
 745         || !pcmk_is_set(instance->flags, pcmk_rsc_unassigned)
 746         || pcmk_is_set(instance->flags, pcmk_rsc_failed)) {
 747         return NULL;
 748     }
 749 
 750     // Check whether instance's current node can run resources
 751     node = pcmk__current_node(instance);
 752     if (!pcmk__node_available(node, true, false)) {
 753         pcmk__rsc_trace(instance, "Not assigning %s to %s early (unavailable)",
 754                         instance->id, pcmk__node_name(node));
 755         return NULL;
 756     }
 757 
 758     // Check whether node already has optimal number of instances assigned
 759     parent_node = pcmk__top_allowed_node(instance, node);
 760     if ((parent_node != NULL) && (parent_node->count >= optimal_per_node)) {
 761         pcmk__rsc_trace(instance,
 762                         "Not assigning %s to %s early "
 763                         "(optimal instances already assigned)",
 764                         instance->id, pcmk__node_name(node));
 765         return NULL;
 766     }
 767 
 768     return node;
 769 }
 770 
 771 /*!
 772  * \internal
 773  * \brief Assign collective instances to nodes
 774  *
 775  * \param[in,out] collective    Clone or bundle resource being assigned
 776  * \param[in,out] instances     List of clone instances or bundle containers
 777  * \param[in]     max_total     Maximum instances to assign in total
 778  * \param[in]     max_per_node  Maximum instances to assign to any one node
 779  */
 780 void
 781 pcmk__assign_instances(pcmk_resource_t *collective, GList *instances,
     /* [previous][next][first][last][top][bottom][index][help] */
 782                        int max_total, int max_per_node)
 783 {
 784     // Reuse node count to track number of assigned instances
 785     unsigned int available_nodes = reset_allowed_node_counts(collective);
 786 
 787     int optimal_per_node = 0;
 788     int assigned = 0;
 789     GList *iter = NULL;
 790     pcmk_resource_t *instance = NULL;
 791     const pcmk_node_t *current = NULL;
 792 
 793     if (available_nodes > 0) {
 794         optimal_per_node = max_total / available_nodes;
 795     }
 796     if (optimal_per_node < 1) {
 797         optimal_per_node = 1;
 798     }
 799 
 800     pcmk__rsc_debug(collective,
 801                     "Assigning up to %d %s instance%s to up to %u node%s "
 802                     "(at most %d per host, %d optimal)",
 803                     max_total, collective->id, pcmk__plural_s(max_total),
 804                     available_nodes, pcmk__plural_s(available_nodes),
 805                     max_per_node, optimal_per_node);
 806 
 807     // Assign as many instances as possible to their current location
 808     for (iter = instances; (iter != NULL) && (assigned < max_total);
 809          iter = iter->next) {
 810         int available = max_total - assigned;
 811 
 812         instance = iter->data;
 813         if (!pcmk_is_set(instance->flags, pcmk_rsc_unassigned)) {
 814             continue;   // Already assigned
 815         }
 816 
 817         current = preferred_node(instance, optimal_per_node);
 818         if ((current != NULL)
 819             && assign_instance_early(collective, instance, current,
 820                                      max_per_node, available)) {
 821             assigned++;
 822         }
 823     }
 824 
 825     pcmk__rsc_trace(collective, "Assigned %d of %d instance%s to current node",
 826                     assigned, max_total, pcmk__plural_s(max_total));
 827 
 828     for (iter = instances; iter != NULL; iter = iter->next) {
 829         instance = (pcmk_resource_t *) iter->data;
 830 
 831         if (!pcmk_is_set(instance->flags, pcmk_rsc_unassigned)) {
 832             continue; // Already assigned
 833         }
 834 
 835         if (instance->running_on != NULL) {
 836             current = pcmk__current_node(instance);
 837             if (pcmk__top_allowed_node(instance, current) == NULL) {
 838                 const char *unmanaged = "";
 839 
 840                 if (!pcmk_is_set(instance->flags, pcmk_rsc_managed)) {
 841                     unmanaged = "Unmanaged resource ";
 842                 }
 843                 crm_notice("%s%s is running on %s which is no longer allowed",
 844                            unmanaged, instance->id, pcmk__node_name(current));
 845             }
 846         }
 847 
 848         if (assigned >= max_total) {
 849             pcmk__rsc_debug(collective,
 850                             "Not assigning %s because maximum %d instances "
 851                             "already assigned",
 852                             instance->id, max_total);
 853             resource_location(instance, NULL, -PCMK_SCORE_INFINITY,
 854                               "collective_limit_reached", collective->cluster);
 855 
 856         } else if (assign_instance(instance, NULL, max_per_node) != NULL) {
 857             assigned++;
 858         }
 859     }
 860 
 861     pcmk__rsc_debug(collective, "Assigned %d of %d possible instance%s of %s",
 862                     assigned, max_total, pcmk__plural_s(max_total),
 863                     collective->id);
 864 }
 865 
 866 enum instance_state {
 867     instance_starting   = (1 << 0),
 868     instance_stopping   = (1 << 1),
 869 
 870     /* This indicates that some instance is restarting. It's not the same as
 871      * instance_starting|instance_stopping, which would indicate that some
 872      * instance is starting, and some instance (not necessarily the same one) is
 873      * stopping.
 874      */
 875     instance_restarting = (1 << 2),
 876 
 877     instance_active     = (1 << 3),
 878 
 879     instance_all        = instance_starting|instance_stopping
 880                           |instance_restarting|instance_active,
 881 };
 882 
 883 /*!
 884  * \internal
 885  * \brief Check whether an instance is active, starting, and/or stopping
 886  *
 887  * \param[in]     instance  Clone instance or bundle replica container
 888  * \param[in,out] state     Whether any instance is starting, stopping, etc.
 889  */
 890 static void
 891 check_instance_state(const pcmk_resource_t *instance, uint32_t *state)
     /* [previous][next][first][last][top][bottom][index][help] */
 892 {
 893     const GList *iter = NULL;
 894     uint32_t instance_state = 0; // State of just this instance
 895 
 896     // No need to check further if all conditions have already been detected
 897     if (pcmk_all_flags_set(*state, instance_all)) {
 898         return;
 899     }
 900 
 901     // If instance is a collective (a cloned group), check its children instead
 902     if (instance->variant > pcmk_rsc_variant_primitive) {
 903         for (iter = instance->children;
 904              (iter != NULL) && !pcmk_all_flags_set(*state, instance_all);
 905              iter = iter->next) {
 906             check_instance_state((const pcmk_resource_t *) iter->data, state);
 907         }
 908         return;
 909     }
 910 
 911     // If we get here, instance is a primitive
 912 
 913     if (instance->running_on != NULL) {
 914         instance_state |= instance_active;
 915     }
 916 
 917     // Check each of the instance's actions for runnable start or stop
 918     for (iter = instance->actions;
 919          (iter != NULL) && !pcmk_all_flags_set(instance_state,
 920                                                instance_starting
 921                                                |instance_stopping);
 922          iter = iter->next) {
 923 
 924         const pcmk_action_t *action = (const pcmk_action_t *) iter->data;
 925         const bool optional = pcmk_is_set(action->flags, pcmk_action_optional);
 926 
 927         if (pcmk__str_eq(PCMK_ACTION_START, action->task, pcmk__str_none)) {
 928             if (!optional
 929                 && pcmk_is_set(action->flags, pcmk_action_runnable)) {
 930 
 931                 pcmk__rsc_trace(instance, "Instance is starting due to %s",
 932                                 action->uuid);
 933                 instance_state |= instance_starting;
 934             } else {
 935                 pcmk__rsc_trace(instance, "%s doesn't affect %s state (%s)",
 936                                 action->uuid, instance->id,
 937                                 (optional? "optional" : "unrunnable"));
 938             }
 939 
 940         } else if (pcmk__str_eq(PCMK_ACTION_STOP, action->task,
 941                                 pcmk__str_none)) {
 942             /* Only stop actions can be pseudo-actions for primitives. That
 943              * indicates that the node they are on is being fenced, so the stop
 944              * is implied rather than actually executed.
 945              */
 946             if (!optional
 947                 && pcmk_any_flags_set(action->flags, pcmk_action_pseudo
 948                                                      |pcmk_action_runnable)) {
 949                 pcmk__rsc_trace(instance, "Instance is stopping due to %s",
 950                                 action->uuid);
 951                 instance_state |= instance_stopping;
 952             } else {
 953                 pcmk__rsc_trace(instance, "%s doesn't affect %s state (%s)",
 954                                 action->uuid, instance->id,
 955                                 (optional? "optional" : "unrunnable"));
 956             }
 957         }
 958     }
 959 
 960     if (pcmk_all_flags_set(instance_state,
 961                            instance_starting|instance_stopping)) {
 962         instance_state |= instance_restarting;
 963     }
 964     *state |= instance_state;
 965 }
 966 
 967 /*!
 968  * \internal
 969  * \brief Create actions for collective resource instances
 970  *
 971  * \param[in,out] collective    Clone or bundle resource to create actions for
 972  * \param[in,out] instances     List of clone instances or bundle containers
 973  */
 974 void
 975 pcmk__create_instance_actions(pcmk_resource_t *collective, GList *instances)
     /* [previous][next][first][last][top][bottom][index][help] */
 976 {
 977     uint32_t state = 0;
 978 
 979     pcmk_action_t *stop = NULL;
 980     pcmk_action_t *stopped = NULL;
 981 
 982     pcmk_action_t *start = NULL;
 983     pcmk_action_t *started = NULL;
 984 
 985     pcmk__rsc_trace(collective, "Creating collective instance actions for %s",
 986                     collective->id);
 987 
 988     // Create actions for each instance appropriate to its variant
 989     for (GList *iter = instances; iter != NULL; iter = iter->next) {
 990         pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
 991 
 992         instance->cmds->create_actions(instance);
 993         check_instance_state(instance, &state);
 994     }
 995 
 996     // Create pseudo-actions for rsc start and started
 997     start = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_START,
 998                                       !pcmk_is_set(state, instance_starting),
 999                                       true);
1000     started = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_RUNNING,
1001                                         !pcmk_is_set(state, instance_starting),
1002                                         false);
1003     started->priority = PCMK_SCORE_INFINITY;
1004     if (pcmk_any_flags_set(state, instance_active|instance_starting)) {
1005         pcmk__set_action_flags(started, pcmk_action_runnable);
1006     }
1007 
1008     // Create pseudo-actions for rsc stop and stopped
1009     stop = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOP,
1010                                      !pcmk_is_set(state, instance_stopping),
1011                                      true);
1012     stopped = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOPPED,
1013                                         !pcmk_is_set(state, instance_stopping),
1014                                         true);
1015     stopped->priority = PCMK_SCORE_INFINITY;
1016     if (!pcmk_is_set(state, instance_restarting)) {
1017         pcmk__set_action_flags(stop, pcmk_action_migratable);
1018     }
1019 
1020     if (pcmk__is_clone(collective)) {
1021         pe__create_clone_notif_pseudo_ops(collective, start, started, stop,
1022                                           stopped);
1023     }
1024 }
1025 
1026 /*!
1027  * \internal
1028  * \brief Get a list of clone instances or bundle replica containers
1029  *
1030  * \param[in] rsc  Clone or bundle resource
1031  *
1032  * \return Clone instances if \p rsc is a clone, or a newly created list of
1033  *         \p rsc's replica containers if \p rsc is a bundle
1034  * \note The caller must call free_instance_list() on the result when the list
1035  *       is no longer needed.
1036  */
1037 static inline GList *
1038 get_instance_list(const pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1039 {
1040     if (pcmk__is_bundle(rsc)) {
1041         return pe__bundle_containers(rsc);
1042     } else {
1043         return rsc->children;
1044     }
1045 }
1046 
1047 /*!
1048  * \internal
1049  * \brief Free any memory created by get_instance_list()
1050  *
1051  * \param[in]     rsc   Clone or bundle resource passed to get_instance_list()
1052  * \param[in,out] list  Return value of get_instance_list() for \p rsc
1053  */
1054 static inline void
1055 free_instance_list(const pcmk_resource_t *rsc, GList *list)
     /* [previous][next][first][last][top][bottom][index][help] */
1056 {
1057     if (list != rsc->children) {
1058         g_list_free(list);
1059     }
1060 }
1061 
1062 /*!
1063  * \internal
1064  * \brief Check whether an instance is compatible with a role and node
1065  *
1066  * \param[in] instance  Clone instance or bundle replica container
1067  * \param[in] node      Instance must match this node
1068  * \param[in] role      If not pcmk_role_unknown, instance must match this role
1069  * \param[in] current   If true, compare instance's original node and role,
1070  *                      otherwise compare assigned next node and role
1071  *
1072  * \return true if \p instance is compatible with \p node and \p role,
1073  *         otherwise false
1074  */
1075 bool
1076 pcmk__instance_matches(const pcmk_resource_t *instance, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1077                        enum rsc_role_e role, bool current)
1078 {
1079     pcmk_node_t *instance_node = NULL;
1080 
1081     CRM_CHECK((instance != NULL) && (node != NULL), return false);
1082 
1083     if ((role != pcmk_role_unknown)
1084         && (role != instance->fns->state(instance, current))) {
1085         pcmk__rsc_trace(instance,
1086                         "%s is not a compatible instance (role is not %s)",
1087                         instance->id, pcmk_role_text(role));
1088         return false;
1089     }
1090 
1091     if (!is_set_recursive(instance, pcmk_rsc_blocked, true)) {
1092         // We only want instances that haven't failed
1093         instance_node = instance->fns->location(instance, NULL, current);
1094     }
1095 
1096     if (instance_node == NULL) {
1097         pcmk__rsc_trace(instance,
1098                         "%s is not a compatible instance "
1099                         "(not assigned to a node)",
1100                         instance->id);
1101         return false;
1102     }
1103 
1104     if (!pcmk__same_node(instance_node, node)) {
1105         pcmk__rsc_trace(instance,
1106                         "%s is not a compatible instance "
1107                         "(assigned to %s not %s)",
1108                         instance->id, pcmk__node_name(instance_node),
1109                         pcmk__node_name(node));
1110         return false;
1111     }
1112 
1113     return true;
1114 }
1115 
1116 #define display_role(r) \
1117     (((r) == pcmk_role_unknown)? "matching" : pcmk_role_text(r))
1118 
1119 /*!
1120  * \internal
1121  * \brief Find an instance that matches a given resource by node and role
1122  *
1123  * \param[in] match_rsc  Resource that instance must match (for logging only)
1124  * \param[in] rsc        Clone or bundle resource to check for matching instance
1125  * \param[in] node       Instance must match this node
1126  * \param[in] role       If not pcmk_role_unknown, instance must match this role
1127  * \param[in] current    If true, compare instance's original node and role,
1128  *                       otherwise compare assigned next node and role
1129  *
1130  * \return \p rsc instance matching \p node and \p role if any, otherwise NULL
1131  */
1132 static pcmk_resource_t *
1133 find_compatible_instance_on_node(const pcmk_resource_t *match_rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1134                                  const pcmk_resource_t *rsc,
1135                                  const pcmk_node_t *node, enum rsc_role_e role,
1136                                  bool current)
1137 {
1138     GList *instances = NULL;
1139 
1140     instances = get_instance_list(rsc);
1141     for (GList *iter = instances; iter != NULL; iter = iter->next) {
1142         pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1143 
1144         if (pcmk__instance_matches(instance, node, role, current)) {
1145             pcmk__rsc_trace(match_rsc,
1146                             "Found %s %s instance %s compatible with %s on %s",
1147                             display_role(role), rsc->id, instance->id,
1148                             match_rsc->id, pcmk__node_name(node));
1149             free_instance_list(rsc, instances); // Only frees list, not contents
1150             return instance;
1151         }
1152     }
1153     free_instance_list(rsc, instances);
1154 
1155     pcmk__rsc_trace(match_rsc,
1156                     "No %s %s instance found compatible with %s on %s",
1157                     display_role(role), rsc->id, match_rsc->id,
1158                     pcmk__node_name(node));
1159     return NULL;
1160 }
1161 
1162 /*!
1163  * \internal
1164  * \brief Find a clone instance or bundle container compatible with a resource
1165  *
1166  * \param[in] match_rsc  Resource that instance must match
1167  * \param[in] rsc        Clone or bundle resource to check for matching instance
1168  * \param[in] role       If not pcmk_role_unknown, instance must match this role
1169  * \param[in] current    If true, compare instance's original node and role,
1170  *                       otherwise compare assigned next node and role
1171  *
1172  * \return Compatible (by \p role and \p match_rsc location) instance of \p rsc
1173  *         if any, otherwise NULL
1174  */
1175 pcmk_resource_t *
1176 pcmk__find_compatible_instance(const pcmk_resource_t *match_rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1177                                const pcmk_resource_t *rsc, enum rsc_role_e role,
1178                                bool current)
1179 {
1180     pcmk_resource_t *instance = NULL;
1181     GList *nodes = NULL;
1182     const pcmk_node_t *node = NULL;
1183 
1184     // If match_rsc has a node, check only that node
1185     node = match_rsc->fns->location(match_rsc, NULL, current);
1186     if (node != NULL) {
1187         return find_compatible_instance_on_node(match_rsc, rsc, node, role,
1188                                                 current);
1189     }
1190 
1191     // Otherwise check for an instance matching any of match_rsc's allowed nodes
1192     nodes = pcmk__sort_nodes(g_hash_table_get_values(match_rsc->allowed_nodes),
1193                              NULL);
1194     for (GList *iter = nodes; (iter != NULL) && (instance == NULL);
1195          iter = iter->next) {
1196         instance = find_compatible_instance_on_node(match_rsc, rsc,
1197                                                     (pcmk_node_t *) iter->data,
1198                                                     role, current);
1199     }
1200 
1201     if (instance == NULL) {
1202         pcmk__rsc_debug(rsc, "No %s instance found compatible with %s",
1203                         rsc->id, match_rsc->id);
1204     }
1205     g_list_free(nodes);
1206     return instance;
1207 }
1208 
1209 /*!
1210  * \internal
1211  * \brief Unassign an instance if mandatory ordering has no interleave match
1212  *
1213  * \param[in]     first          'First' action in an ordering
1214  * \param[in]     then           'Then' action in an ordering
1215  * \param[in,out] then_instance  'Then' instance that has no interleave match
1216  * \param[in]     type           Group of enum pcmk__action_relation_flags
1217  * \param[in]     current        If true, "then" action is stopped or demoted
1218  *
1219  * \return true if \p then_instance was unassigned, otherwise false
1220  */
1221 static bool
1222 unassign_if_mandatory(const pcmk_action_t *first, const pcmk_action_t *then,
     /* [previous][next][first][last][top][bottom][index][help] */
1223                       pcmk_resource_t *then_instance, uint32_t type,
1224                       bool current)
1225 {
1226     // Allow "then" instance to go down even without an interleave match
1227     if (current) {
1228         pcmk__rsc_trace(then->rsc,
1229                         "%s has no instance to order before stopping "
1230                         "or demoting %s",
1231                         first->rsc->id, then_instance->id);
1232 
1233     /* If the "first" action must be runnable, but there is no "first"
1234      * instance, the "then" instance must not be allowed to come up.
1235      */
1236     } else if (pcmk_any_flags_set(type, pcmk__ar_unrunnable_first_blocks
1237                                         |pcmk__ar_first_implies_then)) {
1238         pcmk__rsc_info(then->rsc,
1239                        "Inhibiting %s from being active "
1240                        "because there is no %s instance to interleave",
1241                        then_instance->id, first->rsc->id);
1242         return pcmk__assign_resource(then_instance, NULL, true, true);
1243     }
1244     return false;
1245 }
1246 
1247 /*!
1248  * \internal
1249  * \brief Find first matching action for a clone instance or bundle container
1250  *
1251  * \param[in] action       Action in an interleaved ordering
1252  * \param[in] instance     Clone instance or bundle container being interleaved
1253  * \param[in] action_name  Action to look for
1254  * \param[in] node         If not NULL, require action to be on this node
1255  * \param[in] for_first    If true, \p instance is the 'first' resource in the
1256  *                         ordering, otherwise it is the 'then' resource
1257  *
1258  * \return First action for \p instance (or in some cases if \p instance is a
1259  *         bundle container, its containerized resource) that matches
1260  *         \p action_name and \p node if any, otherwise NULL
1261  */
1262 static pcmk_action_t *
1263 find_instance_action(const pcmk_action_t *action, const pcmk_resource_t *instance,
     /* [previous][next][first][last][top][bottom][index][help] */
1264                      const char *action_name, const pcmk_node_t *node,
1265                      bool for_first)
1266 {
1267     const pcmk_resource_t *rsc = NULL;
1268     pcmk_action_t *matching_action = NULL;
1269 
1270     /* If instance is a bundle container, sometimes we should interleave the
1271      * action for the container itself, and sometimes for the containerized
1272      * resource.
1273      *
1274      * For example, given "start bundle A then bundle B", B likely requires the
1275      * service inside A's container to be active, rather than just the
1276      * container, so we should interleave the action for A's containerized
1277      * resource. On the other hand, it's possible B's container itself requires
1278      * something from A, so we should interleave the action for B's container.
1279      *
1280      * Essentially, for 'first', we should use the containerized resource for
1281      * everything except stop, and for 'then', we should use the container for
1282      * everything except promote and demote (which can only be performed on the
1283      * containerized resource).
1284      */
1285     if ((for_first && !pcmk__str_any_of(action->task, PCMK_ACTION_STOP,
1286                                         PCMK_ACTION_STOPPED, NULL))
1287 
1288         || (!for_first && pcmk__str_any_of(action->task, PCMK_ACTION_PROMOTE,
1289                                            PCMK_ACTION_PROMOTED,
1290                                            PCMK_ACTION_DEMOTE,
1291                                            PCMK_ACTION_DEMOTED, NULL))) {
1292 
1293         rsc = pe__get_rsc_in_container(instance);
1294     }
1295     if (rsc == NULL) {
1296         rsc = instance; // No containerized resource, use instance itself
1297     } else {
1298         node = NULL; // Containerized actions are on bundle-created guest
1299     }
1300 
1301     matching_action = find_first_action(rsc->actions, NULL, action_name, node);
1302     if (matching_action != NULL) {
1303         return matching_action;
1304     }
1305 
1306     if (pcmk_is_set(instance->flags, pcmk_rsc_removed)
1307         || pcmk__str_any_of(action_name, PCMK_ACTION_STOP, PCMK_ACTION_DEMOTE,
1308                             NULL)) {
1309         crm_trace("No %s action found for %s%s",
1310                   action_name,
1311                   pcmk_is_set(instance->flags, pcmk_rsc_removed)? "orphan " : "",
1312                   instance->id);
1313     } else {
1314         crm_err("No %s action found for %s to interleave (bug?)",
1315                 action_name, instance->id);
1316     }
1317     return NULL;
1318 }
1319 
1320 /*!
1321  * \internal
1322  * \brief Get the original action name of a bundle or clone action
1323  *
1324  * Given an action for a bundle or clone, get the original action name,
1325  * mapping notify to the action being notified, and if the instances are
1326  * primitives, mapping completion actions to the action that was completed
1327  * (for example, stopped to stop).
1328  *
1329  * \param[in] action  Clone or bundle action to check
1330  *
1331  * \return Original action name for \p action
1332  */
1333 static const char *
1334 orig_action_name(const pcmk_action_t *action)
     /* [previous][next][first][last][top][bottom][index][help] */
1335 {
1336     // Any instance will do
1337     const pcmk_resource_t *instance = action->rsc->children->data;
1338 
1339     char *action_type = NULL;
1340     const char *action_name = action->task;
1341     enum action_tasks orig_task = pcmk_action_unspecified;
1342 
1343     if (pcmk__strcase_any_of(action->task, PCMK_ACTION_NOTIFY,
1344                              PCMK_ACTION_NOTIFIED, NULL)) {
1345         // action->uuid is RSC_(confirmed-){pre,post}_notify_ACTION_INTERVAL
1346         CRM_CHECK(parse_op_key(action->uuid, NULL, &action_type, NULL),
1347                   return pcmk_action_text(pcmk_action_unspecified));
1348         action_name = strstr(action_type, "_notify_");
1349         CRM_CHECK(action_name != NULL,
1350                   return pcmk_action_text(pcmk_action_unspecified));
1351         action_name += strlen("_notify_");
1352     }
1353     orig_task = get_complex_task(instance, action_name);
1354     free(action_type);
1355     return pcmk_action_text(orig_task);
1356 }
1357 
1358 /*!
1359  * \internal
1360  * \brief Update two interleaved actions according to an ordering between them
1361  *
1362  * Given information about an ordering of two interleaved actions, update the
1363  * actions' flags (and runnable_before members if appropriate) as appropriate
1364  * for the ordering. Effects may cascade to other orderings involving the
1365  * actions as well.
1366  *
1367  * \param[in,out] first     'First' action in an ordering
1368  * \param[in,out] then      'Then' action in an ordering
1369  * \param[in]     node      If not NULL, limit scope of ordering to this node
1370  * \param[in]     filter    Action flags to limit scope of certain updates (may
1371  *                          include pcmk_action_optional to affect only
1372  *                          mandatory actions, and pcmk_action_runnable to
1373  *                          affect only runnable actions)
1374  * \param[in]     type      Group of enum pcmk__action_relation_flags to apply
1375  *
1376  * \return Group of enum pcmk__updated flags indicating what was updated
1377  */
1378 static uint32_t
1379 update_interleaved_actions(pcmk_action_t *first, pcmk_action_t *then,
     /* [previous][next][first][last][top][bottom][index][help] */
1380                            const pcmk_node_t *node, uint32_t filter,
1381                            uint32_t type)
1382 {
1383     GList *instances = NULL;
1384     uint32_t changed = pcmk__updated_none;
1385     const char *orig_first_task = orig_action_name(first);
1386 
1387     // Stops and demotes must be interleaved with instance on current node
1388     bool current = pcmk__ends_with(first->uuid, "_" PCMK_ACTION_STOPPED "_0")
1389                    || pcmk__ends_with(first->uuid,
1390                                       "_" PCMK_ACTION_DEMOTED "_0");
1391 
1392     // Update the specified actions for each "then" instance individually
1393     instances = get_instance_list(then->rsc);
1394     for (GList *iter = instances; iter != NULL; iter = iter->next) {
1395         pcmk_resource_t *first_instance = NULL;
1396         pcmk_resource_t *then_instance = iter->data;
1397 
1398         pcmk_action_t *first_action = NULL;
1399         pcmk_action_t *then_action = NULL;
1400 
1401         // Find a "first" instance to interleave with this "then" instance
1402         first_instance = pcmk__find_compatible_instance(then_instance,
1403                                                         first->rsc,
1404                                                         pcmk_role_unknown,
1405                                                         current);
1406 
1407         if (first_instance == NULL) { // No instance can be interleaved
1408             if (unassign_if_mandatory(first, then, then_instance, type,
1409                                       current)) {
1410                 pcmk__set_updated_flags(changed, first, pcmk__updated_then);
1411             }
1412             continue;
1413         }
1414 
1415         first_action = find_instance_action(first, first_instance,
1416                                             orig_first_task, node, true);
1417         if (first_action == NULL) {
1418             continue;
1419         }
1420 
1421         then_action = find_instance_action(then, then_instance, then->task,
1422                                            node, false);
1423         if (then_action == NULL) {
1424             continue;
1425         }
1426 
1427         if (order_actions(first_action, then_action, type)) {
1428             pcmk__set_updated_flags(changed, first,
1429                                     pcmk__updated_first|pcmk__updated_then);
1430         }
1431 
1432         changed |= then_instance->cmds->update_ordered_actions(
1433             first_action, then_action, node,
1434             first_instance->cmds->action_flags(first_action, node), filter,
1435             type, then->rsc->cluster);
1436     }
1437     free_instance_list(then->rsc, instances);
1438     return changed;
1439 }
1440 
1441 /*!
1442  * \internal
1443  * \brief Check whether two actions in an ordering can be interleaved
1444  *
1445  * \param[in] first  'First' action in the ordering
1446  * \param[in] then   'Then' action in the ordering
1447  *
1448  * \return true if \p first and \p then can be interleaved, otherwise false
1449  */
1450 static bool
1451 can_interleave_actions(const pcmk_action_t *first, const pcmk_action_t *then)
     /* [previous][next][first][last][top][bottom][index][help] */
1452 {
1453     bool interleave = false;
1454     pcmk_resource_t *rsc = NULL;
1455 
1456     if ((first->rsc == NULL) || (then->rsc == NULL)) {
1457         crm_trace("Not interleaving %s with %s: not resource actions",
1458                   first->uuid, then->uuid);
1459         return false;
1460     }
1461 
1462     if (first->rsc == then->rsc) {
1463         crm_trace("Not interleaving %s with %s: same resource",
1464                   first->uuid, then->uuid);
1465         return false;
1466     }
1467 
1468     if ((first->rsc->variant < pcmk_rsc_variant_clone)
1469         || (then->rsc->variant < pcmk_rsc_variant_clone)) {
1470         crm_trace("Not interleaving %s with %s: not clones or bundles",
1471                   first->uuid, then->uuid);
1472         return false;
1473     }
1474 
1475     if (pcmk__ends_with(then->uuid, "_stop_0")
1476         || pcmk__ends_with(then->uuid, "_demote_0")) {
1477         rsc = first->rsc;
1478     } else {
1479         rsc = then->rsc;
1480     }
1481 
1482     interleave = crm_is_true(g_hash_table_lookup(rsc->meta,
1483                                                  PCMK_META_INTERLEAVE));
1484     pcmk__rsc_trace(rsc, "'%s then %s' will %sbe interleaved (based on %s)",
1485                     first->uuid, then->uuid, (interleave? "" : "not "),
1486                     rsc->id);
1487     return interleave;
1488 }
1489 
1490 /*!
1491  * \internal
1492  * \brief Update non-interleaved instance actions according to an ordering
1493  *
1494  * Given information about an ordering of two non-interleaved actions, update
1495  * the actions' flags (and runnable_before members if appropriate) as
1496  * appropriate for the ordering. Effects may cascade to other orderings
1497  * involving the actions as well.
1498  *
1499  * \param[in,out] instance  Clone instance or bundle container
1500  * \param[in,out] first     "First" action in ordering
1501  * \param[in]     then      "Then" action in ordering (for \p instance's parent)
1502  * \param[in]     node      If not NULL, limit scope of ordering to this node
1503  * \param[in]     flags     Action flags for \p first for ordering purposes
1504  * \param[in]     filter    Action flags to limit scope of certain updates (may
1505  *                          include pcmk_action_optional to affect only
1506  *                          mandatory actions, and pcmk_action_runnable to
1507  *                          affect only runnable actions)
1508  * \param[in]     type      Group of enum pcmk__action_relation_flags to apply
1509  *
1510  * \return Group of enum pcmk__updated flags indicating what was updated
1511  */
1512 static uint32_t
1513 update_noninterleaved_actions(pcmk_resource_t *instance, pcmk_action_t *first,
     /* [previous][next][first][last][top][bottom][index][help] */
1514                               const pcmk_action_t *then, const pcmk_node_t *node,
1515                               uint32_t flags, uint32_t filter, uint32_t type)
1516 {
1517     pcmk_action_t *instance_action = NULL;
1518     uint32_t instance_flags = 0;
1519     uint32_t changed = pcmk__updated_none;
1520 
1521     // Check whether instance has an equivalent of "then" action
1522     instance_action = find_first_action(instance->actions, NULL, then->task,
1523                                         node);
1524     if (instance_action == NULL) {
1525         return changed;
1526     }
1527 
1528     // Check whether action is runnable
1529     instance_flags = instance->cmds->action_flags(instance_action, node);
1530     if (!pcmk_is_set(instance_flags, pcmk_action_runnable)) {
1531         return changed;
1532     }
1533 
1534     // If so, update actions for the instance
1535     changed = instance->cmds->update_ordered_actions(first, instance_action,
1536                                                      node, flags, filter, type,
1537                                                      instance->cluster);
1538 
1539     // Propagate any changes to later actions
1540     if (pcmk_is_set(changed, pcmk__updated_then)) {
1541         for (GList *after_iter = instance_action->actions_after;
1542              after_iter != NULL; after_iter = after_iter->next) {
1543             pcmk__related_action_t *after = after_iter->data;
1544 
1545             pcmk__update_action_for_orderings(after->action, instance->cluster);
1546         }
1547     }
1548 
1549     return changed;
1550 }
1551 
1552 /*!
1553  * \internal
1554  * \brief Update two actions according to an ordering between them
1555  *
1556  * Given information about an ordering of two clone or bundle actions, update
1557  * the actions' flags (and runnable_before members if appropriate) as
1558  * appropriate for the ordering. Effects may cascade to other orderings
1559  * involving the actions as well.
1560  *
1561  * \param[in,out] first      'First' action in an ordering
1562  * \param[in,out] then       'Then' action in an ordering
1563  * \param[in]     node       If not NULL, limit scope of ordering to this node
1564  *                           (only used when interleaving instances)
1565  * \param[in]     flags      Action flags for \p first for ordering purposes
1566  * \param[in]     filter     Action flags to limit scope of certain updates (may
1567  *                           include pcmk_action_optional to affect only
1568  *                           mandatory actions, and pcmk_action_runnable to
1569  *                           affect only runnable actions)
1570  * \param[in]     type       Group of enum pcmk__action_relation_flags to apply
1571  * \param[in,out] scheduler  Scheduler data
1572  *
1573  * \return Group of enum pcmk__updated flags indicating what was updated
1574  */
1575 uint32_t
1576 pcmk__instance_update_ordered_actions(pcmk_action_t *first, pcmk_action_t *then,
     /* [previous][next][first][last][top][bottom][index][help] */
1577                                       const pcmk_node_t *node, uint32_t flags,
1578                                       uint32_t filter, uint32_t type,
1579                                       pcmk_scheduler_t *scheduler)
1580 {
1581     pcmk__assert((first != NULL) && (then != NULL) && (scheduler != NULL));
1582 
1583     if (then->rsc == NULL) {
1584         return pcmk__updated_none;
1585 
1586     } else if (can_interleave_actions(first, then)) {
1587         return update_interleaved_actions(first, then, node, filter, type);
1588 
1589     } else {
1590         uint32_t changed = pcmk__updated_none;
1591         GList *instances = get_instance_list(then->rsc);
1592 
1593         // Update actions for the clone or bundle resource itself
1594         changed |= pcmk__update_ordered_actions(first, then, node, flags,
1595                                                 filter, type, scheduler);
1596 
1597         // Update the 'then' clone instances or bundle containers individually
1598         for (GList *iter = instances; iter != NULL; iter = iter->next) {
1599             pcmk_resource_t *instance = iter->data;
1600 
1601             changed |= update_noninterleaved_actions(instance, first, then,
1602                                                      node, flags, filter, type);
1603         }
1604         free_instance_list(then->rsc, instances);
1605         return changed;
1606     }
1607 }
1608 
1609 #define pe__clear_action_summary_flags(flags, action, flag) do {        \
1610         flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE,     \
1611                                      "Action summary", action->rsc->id, \
1612                                      flags, flag, #flag);               \
1613     } while (0)
1614 
1615 /*!
1616  * \internal
1617  * \brief Return action flags for a given clone or bundle action
1618  *
1619  * \param[in,out] action     Action for a clone or bundle
1620  * \param[in]     instances  Clone instances or bundle containers
1621  * \param[in]     node       If not NULL, limit effects to this node
1622  *
1623  * \return Flags appropriate to \p action on \p node
1624  */
1625 uint32_t
1626 pcmk__collective_action_flags(pcmk_action_t *action, const GList *instances,
     /* [previous][next][first][last][top][bottom][index][help] */
1627                               const pcmk_node_t *node)
1628 {
1629     bool any_runnable = false;
1630     const char *action_name = orig_action_name(action);
1631 
1632     // Set original assumptions (optional and runnable may be cleared below)
1633     uint32_t flags = pcmk_action_optional
1634                      |pcmk_action_runnable
1635                      |pcmk_action_pseudo;
1636 
1637     for (const GList *iter = instances; iter != NULL; iter = iter->next) {
1638         const pcmk_resource_t *instance = iter->data;
1639         const pcmk_node_t *instance_node = NULL;
1640         pcmk_action_t *instance_action = NULL;
1641         uint32_t instance_flags;
1642 
1643         // Node is relevant only to primitive instances
1644         if (pcmk__is_primitive(instance)) {
1645             instance_node = node;
1646         }
1647 
1648         instance_action = find_first_action(instance->actions, NULL,
1649                                             action_name, instance_node);
1650         if (instance_action == NULL) {
1651             pcmk__rsc_trace(action->rsc, "%s has no %s action on %s",
1652                             instance->id, action_name, pcmk__node_name(node));
1653             continue;
1654         }
1655 
1656         pcmk__rsc_trace(action->rsc, "%s has %s for %s on %s",
1657                         instance->id, instance_action->uuid, action_name,
1658                         pcmk__node_name(node));
1659 
1660         instance_flags = instance->cmds->action_flags(instance_action, node);
1661 
1662         // If any instance action is mandatory, so is the collective action
1663         if (pcmk_is_set(flags, pcmk_action_optional)
1664             && !pcmk_is_set(instance_flags, pcmk_action_optional)) {
1665             pcmk__rsc_trace(instance, "%s is mandatory because %s is",
1666                             action->uuid, instance_action->uuid);
1667             pe__clear_action_summary_flags(flags, action,
1668                                            pcmk_action_optional);
1669             pcmk__clear_action_flags(action, pcmk_action_optional);
1670         }
1671 
1672         // If any instance action is runnable, so is the collective action
1673         if (pcmk_is_set(instance_flags, pcmk_action_runnable)) {
1674             any_runnable = true;
1675         }
1676     }
1677 
1678     if (!any_runnable) {
1679         pcmk__rsc_trace(action->rsc,
1680                         "%s is not runnable because no instance can run %s",
1681                         action->uuid, action_name);
1682         pe__clear_action_summary_flags(flags, action, pcmk_action_runnable);
1683         if (node == NULL) {
1684             pcmk__clear_action_flags(action, pcmk_action_runnable);
1685         }
1686     }
1687 
1688     return flags;
1689 }

/* [previous][next][first][last][top][bottom][index][help] */