root/lib/pacemaker/pcmk_sched_instances.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. can_run_instance
  2. ban_unavailable_allowed_nodes
  3. new_node_table
  4. apply_parent_colocations
  5. cmp_instance_by_colocation
  6. did_fail
  7. node_is_allowed
  8. pcmk__cmp_instance_number
  9. pcmk__cmp_instance
  10. increment_parent_count
  11. assign_instance
  12. assign_instance_early
  13. reset_allowed_node_counts
  14. preferred_node
  15. pcmk__assign_instances
  16. check_instance_state
  17. pcmk__create_instance_actions
  18. get_instance_list
  19. free_instance_list
  20. pcmk__instance_matches
  21. find_compatible_instance_on_node
  22. pcmk__find_compatible_instance
  23. unassign_if_mandatory
  24. find_instance_action
  25. orig_action_name
  26. update_interleaved_actions
  27. can_interleave_actions
  28. update_noninterleaved_actions
  29. pcmk__instance_update_ordered_actions
  30. pcmk__collective_action_flags

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 /* This file is intended for code usable with both clone instances and bundle
  11  * replica containers.
  12  */
  13 
  14 #include <crm_internal.h>
  15 #include <crm/common/xml.h>
  16 #include <pacemaker-internal.h>
  17 #include "libpacemaker_private.h"
  18 
  19 /*!
  20  * \internal
  21  * \brief Check whether a node is allowed to run an instance
  22  *
  23  * \param[in] instance      Clone instance or bundle container to check
  24  * \param[in] node          Node to check
  25  * \param[in] max_per_node  Maximum number of instances allowed to run on a node
  26  *
  27  * \return true if \p node is allowed to run \p instance, otherwise false
  28  */
  29 static bool
  30 can_run_instance(const pcmk_resource_t *instance, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
  31                  int max_per_node)
  32 {
  33     pcmk_node_t *allowed_node = NULL;
  34 
  35     if (pcmk_is_set(instance->flags, pcmk__rsc_removed)) {
  36         pcmk__rsc_trace(instance, "%s cannot run on %s: orphaned",
  37                         instance->id, pcmk__node_name(node));
  38         return false;
  39     }
  40 
  41     if (!pcmk__node_available(node, false, false)) {
  42         pcmk__rsc_trace(instance,
  43                         "%s cannot run on %s: node cannot run resources",
  44                         instance->id, pcmk__node_name(node));
  45         return false;
  46     }
  47 
  48     allowed_node = pcmk__top_allowed_node(instance, node);
  49     if (allowed_node == NULL) {
  50         crm_warn("%s cannot run on %s: node not allowed",
  51                  instance->id, pcmk__node_name(node));
  52         return false;
  53     }
  54 
  55     if (allowed_node->assign->score < 0) {
  56         pcmk__rsc_trace(instance,
  57                         "%s cannot run on %s: parent score is %s there",
  58                         instance->id, pcmk__node_name(node),
  59                         pcmk_readable_score(allowed_node->assign->score));
  60         return false;
  61     }
  62 
  63     if (allowed_node->assign->count >= max_per_node) {
  64         pcmk__rsc_trace(instance,
  65                         "%s cannot run on %s: node already has %d instance%s",
  66                         instance->id, pcmk__node_name(node), max_per_node,
  67                         pcmk__plural_s(max_per_node));
  68         return false;
  69     }
  70 
  71     pcmk__rsc_trace(instance, "%s can run on %s (%d already running)",
  72                     instance->id, pcmk__node_name(node),
  73                     allowed_node->assign->count);
  74     return true;
  75 }
  76 
  77 /*!
  78  * \internal
  79  * \brief Ban a clone instance or bundle replica from unavailable allowed nodes
  80  *
  81  * \param[in,out] instance      Clone instance or bundle replica to ban
  82  * \param[in]     max_per_node  Maximum instances allowed to run on a node
  83  */
  84 static void
  85 ban_unavailable_allowed_nodes(pcmk_resource_t *instance, int max_per_node)
     /* [previous][next][first][last][top][bottom][index][help] */
  86 {
  87     if (instance->priv->allowed_nodes != NULL) {
  88         GHashTableIter iter;
  89         pcmk_node_t *node = NULL;
  90 
  91         g_hash_table_iter_init(&iter, instance->priv->allowed_nodes);
  92         while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
  93             if (!can_run_instance(instance, node, max_per_node)) {
  94                 pcmk__rsc_trace(instance, "Banning %s from unavailable node %s",
  95                                 instance->id, pcmk__node_name(node));
  96                 node->assign->score = -PCMK_SCORE_INFINITY;
  97 
  98                 for (GList *child_iter = instance->priv->children;
  99                      child_iter != NULL; child_iter = child_iter->next) {
 100 
 101                     pcmk_resource_t *child = child_iter->data;
 102                     pcmk_node_t *child_node = NULL;
 103 
 104                     child_node =
 105                         g_hash_table_lookup(child->priv->allowed_nodes,
 106                                             node->priv->id);
 107                     if (child_node != NULL) {
 108                         pcmk__rsc_trace(instance,
 109                                         "Banning %s child %s "
 110                                         "from unavailable node %s",
 111                                         instance->id, child->id,
 112                                         pcmk__node_name(node));
 113                         child_node->assign->score = -PCMK_SCORE_INFINITY;
 114                     }
 115                 }
 116             }
 117         }
 118     }
 119 }
 120 
 121 /*!
 122  * \internal
 123  * \brief Create a hash table with a single node in it
 124  *
 125  * \param[in] node  Node to copy into new table
 126  *
 127  * \return Newly created hash table containing a copy of \p node
 128  * \note The caller is responsible for freeing the result with
 129  *       g_hash_table_destroy().
 130  */
 131 static GHashTable *
 132 new_node_table(pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 133 {
 134     GHashTable *table = pcmk__strkey_table(NULL, pcmk__free_node_copy);
 135 
 136     node = pe__copy_node(node);
 137     g_hash_table_insert(table, (gpointer) node->priv->id, node);
 138     return table;
 139 }
 140 
 141 /*!
 142  * \internal
 143  * \brief Apply a resource's parent's colocation scores to a node table
 144  *
 145  * \param[in]     rsc    Resource whose colocations should be applied
 146  * \param[in,out] nodes  Node table to apply colocations to
 147  */
 148 static void
 149 apply_parent_colocations(const pcmk_resource_t *rsc, GHashTable **nodes)
     /* [previous][next][first][last][top][bottom][index][help] */
 150 {
 151     GList *colocations = pcmk__this_with_colocations(rsc);
 152 
 153     for (const GList *iter = colocations; iter != NULL; iter = iter->next) {
 154         const pcmk__colocation_t *colocation = iter->data;
 155         pcmk_resource_t *other = colocation->primary;
 156         float factor = colocation->score / (float) PCMK_SCORE_INFINITY;
 157 
 158         other->priv->cmds->add_colocated_node_scores(other, rsc, rsc->id,
 159                                                      nodes, colocation, factor,
 160                                                      pcmk__coloc_select_default);
 161     }
 162     g_list_free(colocations);
 163     colocations = pcmk__with_this_colocations(rsc);
 164 
 165     for (const GList *iter = colocations; iter != NULL; iter = iter->next) {
 166         const pcmk__colocation_t *colocation = iter->data;
 167         pcmk_resource_t *other = colocation->dependent;
 168         float factor = colocation->score / (float) PCMK_SCORE_INFINITY;
 169 
 170         if (!pcmk__colocation_has_influence(colocation, rsc)) {
 171             continue;
 172         }
 173         other->priv->cmds->add_colocated_node_scores(other, rsc, rsc->id,
 174                                                      nodes, colocation, factor,
 175                                                      pcmk__coloc_select_nonnegative);
 176     }
 177     g_list_free(colocations);
 178 }
 179 
 180 /*!
 181  * \internal
 182  * \brief Compare clone or bundle instances based on colocation scores
 183  *
 184  * Determine the relative order in which two clone or bundle instances should be
 185  * assigned to nodes, considering the scores of colocation constraints directly
 186  * or indirectly involving them.
 187  *
 188  * \param[in] instance1  First instance to compare
 189  * \param[in] instance2  Second instance to compare
 190  *
 191  * \return A negative number if \p instance1 should be assigned first,
 192  *         a positive number if \p instance2 should be assigned first,
 193  *         or 0 if assignment order doesn't matter
 194  */
 195 static int
 196 cmp_instance_by_colocation(const pcmk_resource_t *instance1,
     /* [previous][next][first][last][top][bottom][index][help] */
 197                            const pcmk_resource_t *instance2)
 198 {
 199     int rc = 0;
 200     pcmk_node_t *node1 = NULL;
 201     pcmk_node_t *node2 = NULL;
 202     pcmk_node_t *current_node1 = pcmk__current_node(instance1);
 203     pcmk_node_t *current_node2 = pcmk__current_node(instance2);
 204     GHashTable *colocated_scores1 = NULL;
 205     GHashTable *colocated_scores2 = NULL;
 206 
 207     pcmk__assert((instance1 != NULL) && (instance1->priv->parent != NULL)
 208                  && (instance2 != NULL) && (instance2->priv->parent != NULL)
 209                  && (current_node1 != NULL) && (current_node2 != NULL));
 210 
 211     // Create node tables initialized with each node
 212     colocated_scores1 = new_node_table(current_node1);
 213     colocated_scores2 = new_node_table(current_node2);
 214 
 215     // Apply parental colocations
 216     apply_parent_colocations(instance1, &colocated_scores1);
 217     apply_parent_colocations(instance2, &colocated_scores2);
 218 
 219     // Find original nodes again, with scores updated for colocations
 220     node1 = g_hash_table_lookup(colocated_scores1, current_node1->priv->id);
 221     node2 = g_hash_table_lookup(colocated_scores2, current_node2->priv->id);
 222 
 223     // Compare nodes by updated scores
 224     if (node1->assign->score < node2->assign->score) {
 225         crm_trace("Assign %s (%d on %s) after %s (%d on %s)",
 226                   instance1->id, node1->assign->score, pcmk__node_name(node1),
 227                   instance2->id, node2->assign->score, pcmk__node_name(node2));
 228         rc = 1;
 229 
 230     } else if (node1->assign->score > node2->assign->score) {
 231         crm_trace("Assign %s (%d on %s) before %s (%d on %s)",
 232                   instance1->id, node1->assign->score, pcmk__node_name(node1),
 233                   instance2->id, node2->assign->score, pcmk__node_name(node2));
 234         rc = -1;
 235     }
 236 
 237     g_hash_table_destroy(colocated_scores1);
 238     g_hash_table_destroy(colocated_scores2);
 239     return rc;
 240 }
 241 
 242 /*!
 243  * \internal
 244  * \brief Check whether a resource or any of its children are failed
 245  *
 246  * \param[in] rsc  Resource to check
 247  *
 248  * \return true if \p rsc or any of its children are failed, otherwise false
 249  */
 250 static bool
 251 did_fail(const pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 252 {
 253     if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
 254         return true;
 255     }
 256 
 257     for (GList *iter = rsc->priv->children;
 258          iter != NULL; iter = iter->next) {
 259 
 260         if (did_fail((const pcmk_resource_t *) iter->data)) {
 261             return true;
 262         }
 263     }
 264     return false;
 265 }
 266 
 267 /*!
 268  * \internal
 269  * \brief Check whether a node is allowed to run a resource
 270  *
 271  * \param[in]     rsc   Resource to check
 272  * \param[in,out] node  Node to check (will be set NULL if not allowed)
 273  *
 274  * \return true if *node is either NULL or allowed for \p rsc, otherwise false
 275  */
 276 static bool
 277 node_is_allowed(const pcmk_resource_t *rsc, pcmk_node_t **node)
     /* [previous][next][first][last][top][bottom][index][help] */
 278 {
 279     if (*node != NULL) {
 280         pcmk_node_t *allowed = g_hash_table_lookup(rsc->priv->allowed_nodes,
 281                                                    (*node)->priv->id);
 282 
 283         if ((allowed == NULL) || (allowed->assign->score < 0)) {
 284             pcmk__rsc_trace(rsc, "%s: current location (%s) is unavailable",
 285                             rsc->id, pcmk__node_name(*node));
 286             *node = NULL;
 287             return false;
 288         }
 289     }
 290     return true;
 291 }
 292 
 293 /*!
 294  * \internal
 295  * \brief Compare two clone or bundle instances' instance numbers
 296  *
 297  * \param[in] a  First instance to compare
 298  * \param[in] b  Second instance to compare
 299  *
 300  * \return A negative number if \p a's instance number is lower,
 301  *         a positive number if \p b's instance number is lower,
 302  *         or 0 if their instance numbers are the same
 303  */
 304 gint
 305 pcmk__cmp_instance_number(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
 306 {
 307     const pcmk_resource_t *instance1 = (const pcmk_resource_t *) a;
 308     const pcmk_resource_t *instance2 = (const pcmk_resource_t *) b;
 309     char *div1 = NULL;
 310     char *div2 = NULL;
 311 
 312     pcmk__assert((instance1 != NULL) && (instance2 != NULL));
 313 
 314     // Clone numbers are after a colon, bundle numbers after a dash
 315     div1 = strrchr(instance1->id, ':');
 316     if (div1 == NULL) {
 317         div1 = strrchr(instance1->id, '-');
 318     }
 319     div2 = strrchr(instance2->id, ':');
 320     if (div2 == NULL) {
 321         div2 = strrchr(instance2->id, '-');
 322     }
 323     pcmk__assert((div1 != NULL) && (div2 != NULL));
 324 
 325     return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10));
 326 }
 327 
 328 /*!
 329  * \internal
 330  * \brief Compare clone or bundle instances according to assignment order
 331  *
 332  * Compare two clone or bundle instances according to the order they should be
 333  * assigned to nodes, preferring (in order):
 334  *
 335  *  - Active instance that is less multiply active
 336  *  - Instance that is not active on a disallowed node
 337  *  - Instance with higher configured priority
 338  *  - Active instance whose current node can run resources
 339  *  - Active instance whose parent is allowed on current node
 340  *  - Active instance whose current node has fewer other instances
 341  *  - Active instance
 342  *  - Instance that isn't failed
 343  *  - Instance whose colocations result in higher score on current node
 344  *  - Instance with lower ID in lexicographic order
 345  *
 346  * \param[in] a          First instance to compare
 347  * \param[in] b          Second instance to compare
 348  *
 349  * \return A negative number if \p a should be assigned first,
 350  *         a positive number if \p b should be assigned first,
 351  *         or 0 if assignment order doesn't matter
 352  */
 353 gint
 354 pcmk__cmp_instance(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
 355 {
 356     int rc = 0;
 357     pcmk_node_t *node1 = NULL;
 358     pcmk_node_t *node2 = NULL;
 359     unsigned int nnodes1 = 0;
 360     unsigned int nnodes2 = 0;
 361 
 362     bool can1 = true;
 363     bool can2 = true;
 364 
 365     const pcmk_resource_t *instance1 = (const pcmk_resource_t *) a;
 366     const pcmk_resource_t *instance2 = (const pcmk_resource_t *) b;
 367 
 368     pcmk__assert((instance1 != NULL) && (instance2 != NULL));
 369 
 370     node1 = instance1->priv->fns->active_node(instance1, &nnodes1, NULL);
 371     node2 = instance2->priv->fns->active_node(instance2, &nnodes2, NULL);
 372 
 373     /* If both instances are running and at least one is multiply
 374      * active, prefer instance that's running on fewer nodes.
 375      */
 376     if ((nnodes1 > 0) && (nnodes2 > 0)) {
 377         if (nnodes1 < nnodes2) {
 378             crm_trace("Assign %s (active on %d) before %s (active on %d): "
 379                       "less multiply active",
 380                       instance1->id, nnodes1, instance2->id, nnodes2);
 381             return -1;
 382 
 383         } else if (nnodes1 > nnodes2) {
 384             crm_trace("Assign %s (active on %d) after %s (active on %d): "
 385                       "more multiply active",
 386                       instance1->id, nnodes1, instance2->id, nnodes2);
 387             return 1;
 388         }
 389     }
 390 
 391     /* An instance that is either inactive or active on an allowed node is
 392      * preferred over an instance that is active on a no-longer-allowed node.
 393      */
 394     can1 = node_is_allowed(instance1, &node1);
 395     can2 = node_is_allowed(instance2, &node2);
 396     if (can1 && !can2) {
 397         crm_trace("Assign %s before %s: not active on a disallowed node",
 398                   instance1->id, instance2->id);
 399         return -1;
 400 
 401     } else if (!can1 && can2) {
 402         crm_trace("Assign %s after %s: active on a disallowed node",
 403                   instance1->id, instance2->id);
 404         return 1;
 405     }
 406 
 407     // Prefer instance with higher configured priority
 408     if (instance1->priv->priority > instance2->priv->priority) {
 409         crm_trace("Assign %s before %s: priority (%d > %d)",
 410                   instance1->id, instance2->id,
 411                   instance1->priv->priority, instance2->priv->priority);
 412         return -1;
 413 
 414     } else if (instance1->priv->priority < instance2->priv->priority) {
 415         crm_trace("Assign %s after %s: priority (%d < %d)",
 416                   instance1->id, instance2->id,
 417                   instance1->priv->priority, instance2->priv->priority);
 418         return 1;
 419     }
 420 
 421     // Prefer active instance
 422     if ((node1 == NULL) && (node2 == NULL)) {
 423         crm_trace("No assignment preference for %s vs. %s: inactive",
 424                   instance1->id, instance2->id);
 425         return 0;
 426 
 427     } else if (node1 == NULL) {
 428         crm_trace("Assign %s after %s: active", instance1->id, instance2->id);
 429         return 1;
 430 
 431     } else if (node2 == NULL) {
 432         crm_trace("Assign %s before %s: active", instance1->id, instance2->id);
 433         return -1;
 434     }
 435 
 436     // Prefer instance whose current node can run resources
 437     can1 = pcmk__node_available(node1, false, false);
 438     can2 = pcmk__node_available(node2, false, false);
 439     if (can1 && !can2) {
 440         crm_trace("Assign %s before %s: current node can run resources",
 441                   instance1->id, instance2->id);
 442         return -1;
 443 
 444     } else if (!can1 && can2) {
 445         crm_trace("Assign %s after %s: current node can't run resources",
 446                   instance1->id, instance2->id);
 447         return 1;
 448     }
 449 
 450     // Prefer instance whose parent is allowed to run on instance's current node
 451     node1 = pcmk__top_allowed_node(instance1, node1);
 452     node2 = pcmk__top_allowed_node(instance2, node2);
 453     if ((node1 == NULL) && (node2 == NULL)) {
 454         crm_trace("No assignment preference for %s vs. %s: "
 455                   "parent not allowed on either instance's current node",
 456                   instance1->id, instance2->id);
 457         return 0;
 458 
 459     } else if (node1 == NULL) {
 460         crm_trace("Assign %s after %s: parent not allowed on current node",
 461                   instance1->id, instance2->id);
 462         return 1;
 463 
 464     } else if (node2 == NULL) {
 465         crm_trace("Assign %s before %s: parent allowed on current node",
 466                   instance1->id, instance2->id);
 467         return -1;
 468     }
 469 
 470     // Prefer instance whose current node is running fewer other instances
 471     if (node1->assign->count < node2->assign->count) {
 472         crm_trace("Assign %s before %s: fewer active instances on current node",
 473                   instance1->id, instance2->id);
 474         return -1;
 475 
 476     } else if (node1->assign->count > node2->assign->count) {
 477         crm_trace("Assign %s after %s: more active instances on current node",
 478                   instance1->id, instance2->id);
 479         return 1;
 480     }
 481 
 482     // Prefer instance that isn't failed
 483     can1 = did_fail(instance1);
 484     can2 = did_fail(instance2);
 485     if (!can1 && can2) {
 486         crm_trace("Assign %s before %s: not failed",
 487                   instance1->id, instance2->id);
 488         return -1;
 489     } else if (can1 && !can2) {
 490         crm_trace("Assign %s after %s: failed",
 491                   instance1->id, instance2->id);
 492         return 1;
 493     }
 494 
 495     // Prefer instance with higher cumulative colocation score on current node
 496     rc = cmp_instance_by_colocation(instance1, instance2);
 497     if (rc != 0) {
 498         return rc;
 499     }
 500 
 501     // Prefer instance with lower instance number
 502     rc = pcmk__cmp_instance_number(instance1, instance2);
 503     if (rc < 0) {
 504         crm_trace("Assign %s before %s: instance number",
 505                   instance1->id, instance2->id);
 506     } else if (rc > 0) {
 507         crm_trace("Assign %s after %s: instance number",
 508                   instance1->id, instance2->id);
 509     } else {
 510         crm_trace("No assignment preference for %s vs. %s",
 511                   instance1->id, instance2->id);
 512     }
 513     return rc;
 514 }
 515 
 516 /*!
 517  * \internal
 518  * \brief Increment the parent's instance count after assigning an instance
 519  *
 520  * An instance's parent tracks how many instances have been assigned to each
 521  * node via its pcmk_node_t:count member. After assigning an instance to a node,
 522  * find the corresponding node in the parent's allowed table and increment it.
 523  *
 524  * \param[in,out] instance     Instance whose parent to update
 525  * \param[in]     assigned_to  Node to which the instance was assigned
 526  */
 527 static void
 528 increment_parent_count(pcmk_resource_t *instance,
     /* [previous][next][first][last][top][bottom][index][help] */
 529                        const pcmk_node_t *assigned_to)
 530 {
 531     pcmk_node_t *allowed = NULL;
 532 
 533     if (assigned_to == NULL) {
 534         return;
 535     }
 536     allowed = pcmk__top_allowed_node(instance, assigned_to);
 537 
 538     if (allowed == NULL) {
 539         /* The instance is allowed on the node, but its parent isn't. This
 540          * shouldn't be possible if the resource is managed, and we won't be
 541          * able to limit the number of instances assigned to the node.
 542          */
 543         CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pcmk__rsc_managed));
 544 
 545     } else {
 546         allowed->assign->count++;
 547     }
 548 }
 549 
 550 /*!
 551  * \internal
 552  * \brief Assign an instance to a node
 553  *
 554  * \param[in,out] instance      Clone instance or bundle replica container
 555  * \param[in]     prefer        If not NULL, attempt early assignment to this
 556  *                              node, if still the best choice; otherwise,
 557  *                              perform final assignment
 558  * \param[in]     max_per_node  Assign at most this many instances to one node
 559  *
 560  * \return Node to which \p instance is assigned
 561  */
 562 static const pcmk_node_t *
 563 assign_instance(pcmk_resource_t *instance, const pcmk_node_t *prefer,
     /* [previous][next][first][last][top][bottom][index][help] */
 564                 int max_per_node)
 565 {
 566     pcmk_node_t *chosen = NULL;
 567 
 568     pcmk__rsc_trace(instance, "Assigning %s (preferring %s)", instance->id,
 569                     ((prefer == NULL)? "no node" : prefer->priv->name));
 570 
 571     if (pcmk_is_set(instance->flags, pcmk__rsc_assigning)) {
 572         pcmk__rsc_debug(instance,
 573                         "Assignment loop detected involving %s colocations",
 574                         instance->id);
 575         return NULL;
 576     }
 577     ban_unavailable_allowed_nodes(instance, max_per_node);
 578 
 579     // Failed early assignments are reversible (stop_if_fail=false)
 580     chosen = instance->priv->cmds->assign(instance, prefer, (prefer == NULL));
 581     increment_parent_count(instance, chosen);
 582     return chosen;
 583 }
 584 
 585 /*!
 586  * \internal
 587  * \brief Try to assign an instance to its current node early
 588  *
 589  * \param[in] rsc           Clone or bundle being assigned (for logs only)
 590  * \param[in] instance      Clone instance or bundle replica container
 591  * \param[in] current       Instance's current node
 592  * \param[in] max_per_node  Maximum number of instances per node
 593  * \param[in] available     Number of instances still available for assignment
 594  *
 595  * \return \c true if \p instance was successfully assigned to its current node,
 596  *         or \c false otherwise
 597  */
 598 static bool
 599 assign_instance_early(const pcmk_resource_t *rsc, pcmk_resource_t *instance,
     /* [previous][next][first][last][top][bottom][index][help] */
 600                       const pcmk_node_t *current, int max_per_node,
 601                       int available)
 602 {
 603     const pcmk_node_t *chosen = NULL;
 604     int reserved = 0;
 605 
 606     pcmk_resource_t *parent = instance->priv->parent;
 607     GHashTable *allowed_orig = NULL;
 608     GHashTable *allowed_orig_parent = parent->priv->allowed_nodes;
 609     const pcmk_node_t *allowed_node = NULL;
 610 
 611     pcmk__rsc_trace(instance, "Trying to assign %s to its current node %s",
 612                     instance->id, pcmk__node_name(current));
 613 
 614     allowed_node = g_hash_table_lookup(instance->priv->allowed_nodes,
 615                                        current->priv->id);
 616     if (!pcmk__node_available(allowed_node, true, false)) {
 617         pcmk__rsc_info(instance,
 618                        "Not assigning %s to current node %s: unavailable",
 619                        instance->id, pcmk__node_name(current));
 620         return false;
 621     }
 622 
 623     /* On each iteration, if instance gets assigned to a node other than its
 624      * current one, we reserve one instance for the chosen node, unassign
 625      * instance, restore instance's original node tables, and try again. This
 626      * way, instances are proportionally assigned to nodes based on preferences,
 627      * but shuffling of specific instances is minimized. If a node will be
 628      * assigned instances at all, it preferentially receives instances that are
 629      * currently active there.
 630      *
 631      * parent->private->allowed_nodes tracks the number of instances assigned to
 632      * each node. If a node already has max_per_node instances assigned,
 633      * ban_unavailable_allowed_nodes() marks it as unavailable.
 634      *
 635      * In the end, we restore the original parent->private->allowed_nodes to
 636      * undo the changes to counts during tentative assignments. If we
 637      * successfully assigned an instance to its current node, we increment that
 638      * node's counter.
 639      */
 640 
 641     // Back up the allowed node tables of instance and its children recursively
 642     pcmk__copy_node_tables(instance, &allowed_orig);
 643 
 644     // Update instances-per-node counts in a scratch table
 645     parent->priv->allowed_nodes = pcmk__copy_node_table(allowed_orig_parent);
 646 
 647     while (reserved < available) {
 648         chosen = assign_instance(instance, current, max_per_node);
 649 
 650         if (pcmk__same_node(chosen, current)) {
 651             // Successfully assigned to current node
 652             break;
 653         }
 654 
 655         // Assignment updates scores, so restore to original state
 656         pcmk__rsc_debug(instance, "Rolling back node scores for %s",
 657                         instance->id);
 658         pcmk__restore_node_tables(instance, allowed_orig);
 659 
 660         if (chosen == NULL) {
 661             // Assignment failed, so give up
 662             pcmk__rsc_info(instance,
 663                            "Not assigning %s to current node %s: unavailable",
 664                            instance->id, pcmk__node_name(current));
 665             pcmk__set_rsc_flags(instance, pcmk__rsc_unassigned);
 666             break;
 667         }
 668 
 669         // We prefer more strongly to assign an instance to the chosen node
 670         pcmk__rsc_debug(instance,
 671                         "Not assigning %s to current node %s: %s is better",
 672                         instance->id, pcmk__node_name(current),
 673                         pcmk__node_name(chosen));
 674 
 675         // Reserve one instance for the chosen node and try again
 676         if (++reserved >= available) {
 677             pcmk__rsc_info(instance,
 678                            "Not assigning %s to current node %s: "
 679                            "other assignments are more important",
 680                            instance->id, pcmk__node_name(current));
 681 
 682         } else {
 683             pcmk__rsc_debug(instance,
 684                             "Reserved an instance of %s for %s. Retrying "
 685                             "assignment of %s to %s",
 686                             rsc->id, pcmk__node_name(chosen), instance->id,
 687                             pcmk__node_name(current));
 688         }
 689 
 690         // Clear this assignment (frees chosen); leave instance counts in parent
 691         pcmk__unassign_resource(instance);
 692         chosen = NULL;
 693     }
 694 
 695     g_hash_table_destroy(allowed_orig);
 696 
 697     // Restore original instances-per-node counts
 698     g_hash_table_destroy(parent->priv->allowed_nodes);
 699     parent->priv->allowed_nodes = allowed_orig_parent;
 700 
 701     if (chosen == NULL) {
 702         // Couldn't assign instance to current node
 703         return false;
 704     }
 705     pcmk__rsc_trace(instance, "Assigned %s to current node %s",
 706                     instance->id, pcmk__node_name(current));
 707     increment_parent_count(instance, chosen);
 708     return true;
 709 }
 710 
 711 /*!
 712  * \internal
 713  * \brief Reset the node counts of a resource's allowed nodes to zero
 714  *
 715  * \param[in,out] rsc  Resource to reset
 716  *
 717  * \return Number of nodes that are available to run resources
 718  */
 719 static unsigned int
 720 reset_allowed_node_counts(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 721 {
 722     unsigned int available_nodes = 0;
 723     pcmk_node_t *node = NULL;
 724     GHashTableIter iter;
 725 
 726     g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes);
 727     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 728         node->assign->count = 0;
 729         if (pcmk__node_available(node, false, false)) {
 730             available_nodes++;
 731         }
 732     }
 733     return available_nodes;
 734 }
 735 
 736 /*!
 737  * \internal
 738  * \brief Check whether an instance has a preferred node
 739  *
 740  * \param[in] instance          Clone instance or bundle replica container
 741  * \param[in] optimal_per_node  Optimal number of instances per node
 742  *
 743  * \return Instance's current node if still available, otherwise NULL
 744  */
 745 static const pcmk_node_t *
 746 preferred_node(const pcmk_resource_t *instance, int optimal_per_node)
     /* [previous][next][first][last][top][bottom][index][help] */
 747 {
 748     const pcmk_node_t *node = NULL;
 749     const pcmk_node_t *parent_node = NULL;
 750 
 751     // Check whether instance is active, healthy, and not yet assigned
 752     if ((instance->priv->active_nodes == NULL)
 753         || !pcmk_is_set(instance->flags, pcmk__rsc_unassigned)
 754         || pcmk_is_set(instance->flags, pcmk__rsc_failed)) {
 755         return NULL;
 756     }
 757 
 758     // Check whether instance's current node can run resources
 759     node = pcmk__current_node(instance);
 760     if (!pcmk__node_available(node, true, false)) {
 761         pcmk__rsc_trace(instance, "Not assigning %s to %s early (unavailable)",
 762                         instance->id, pcmk__node_name(node));
 763         return NULL;
 764     }
 765 
 766     // Check whether node already has optimal number of instances assigned
 767     parent_node = pcmk__top_allowed_node(instance, node);
 768     if ((parent_node != NULL)
 769         && (parent_node->assign->count >= optimal_per_node)) {
 770         pcmk__rsc_trace(instance,
 771                         "Not assigning %s to %s early "
 772                         "(optimal instances already assigned)",
 773                         instance->id, pcmk__node_name(node));
 774         return NULL;
 775     }
 776 
 777     return node;
 778 }
 779 
 780 /*!
 781  * \internal
 782  * \brief Assign collective instances to nodes
 783  *
 784  * \param[in,out] collective    Clone or bundle resource being assigned
 785  * \param[in,out] instances     List of clone instances or bundle containers
 786  * \param[in]     max_total     Maximum instances to assign in total
 787  * \param[in]     max_per_node  Maximum instances to assign to any one node
 788  */
 789 void
 790 pcmk__assign_instances(pcmk_resource_t *collective, GList *instances,
     /* [previous][next][first][last][top][bottom][index][help] */
 791                        int max_total, int max_per_node)
 792 {
 793     // Reuse node count to track number of assigned instances
 794     unsigned int available_nodes = reset_allowed_node_counts(collective);
 795 
 796     int optimal_per_node = 0;
 797     int assigned = 0;
 798     GList *iter = NULL;
 799     pcmk_resource_t *instance = NULL;
 800     const pcmk_node_t *current = NULL;
 801 
 802     if (available_nodes > 0) {
 803         optimal_per_node = max_total / available_nodes;
 804     }
 805     if (optimal_per_node < 1) {
 806         optimal_per_node = 1;
 807     }
 808 
 809     pcmk__rsc_debug(collective,
 810                     "Assigning up to %d %s instance%s to up to %u node%s "
 811                     "(at most %d per host, %d optimal)",
 812                     max_total, collective->id, pcmk__plural_s(max_total),
 813                     available_nodes, pcmk__plural_s(available_nodes),
 814                     max_per_node, optimal_per_node);
 815 
 816     // Assign as many instances as possible to their current location
 817     for (iter = instances; (iter != NULL) && (assigned < max_total);
 818          iter = iter->next) {
 819         int available = max_total - assigned;
 820 
 821         instance = iter->data;
 822         if (!pcmk_is_set(instance->flags, pcmk__rsc_unassigned)) {
 823             continue;   // Already assigned
 824         }
 825 
 826         current = preferred_node(instance, optimal_per_node);
 827         if ((current != NULL)
 828             && assign_instance_early(collective, instance, current,
 829                                      max_per_node, available)) {
 830             assigned++;
 831         }
 832     }
 833 
 834     pcmk__rsc_trace(collective, "Assigned %d of %d instance%s to current node",
 835                     assigned, max_total, pcmk__plural_s(max_total));
 836 
 837     for (iter = instances; iter != NULL; iter = iter->next) {
 838         instance = (pcmk_resource_t *) iter->data;
 839 
 840         if (!pcmk_is_set(instance->flags, pcmk__rsc_unassigned)) {
 841             continue; // Already assigned
 842         }
 843 
 844         if (instance->priv->active_nodes != NULL) {
 845             current = pcmk__current_node(instance);
 846             if (pcmk__top_allowed_node(instance, current) == NULL) {
 847                 const char *unmanaged = "";
 848 
 849                 if (!pcmk_is_set(instance->flags, pcmk__rsc_managed)) {
 850                     unmanaged = "Unmanaged resource ";
 851                 }
 852                 crm_notice("%s%s is running on %s which is no longer allowed",
 853                            unmanaged, instance->id, pcmk__node_name(current));
 854             }
 855         }
 856 
 857         if (assigned >= max_total) {
 858             pcmk__rsc_debug(collective,
 859                             "Not assigning %s because maximum %d instances "
 860                             "already assigned",
 861                             instance->id, max_total);
 862             resource_location(instance, NULL, -PCMK_SCORE_INFINITY,
 863                               "collective_limit_reached",
 864                               collective->priv->scheduler);
 865 
 866         } else if (assign_instance(instance, NULL, max_per_node) != NULL) {
 867             assigned++;
 868         }
 869     }
 870 
 871     pcmk__rsc_debug(collective, "Assigned %d of %d possible instance%s of %s",
 872                     assigned, max_total, pcmk__plural_s(max_total),
 873                     collective->id);
 874 }
 875 
 876 enum instance_state {
 877     instance_starting   = (1 << 0),
 878     instance_stopping   = (1 << 1),
 879 
 880     /* This indicates that some instance is restarting. It's not the same as
 881      * instance_starting|instance_stopping, which would indicate that some
 882      * instance is starting, and some instance (not necessarily the same one) is
 883      * stopping.
 884      */
 885     instance_restarting = (1 << 2),
 886 
 887     instance_active     = (1 << 3),
 888 
 889     instance_all        = instance_starting|instance_stopping
 890                           |instance_restarting|instance_active,
 891 };
 892 
 893 /*!
 894  * \internal
 895  * \brief Check whether an instance is active, starting, and/or stopping
 896  *
 897  * \param[in]     instance  Clone instance or bundle replica container
 898  * \param[in,out] state     Whether any instance is starting, stopping, etc.
 899  */
 900 static void
 901 check_instance_state(const pcmk_resource_t *instance, uint32_t *state)
     /* [previous][next][first][last][top][bottom][index][help] */
 902 {
 903     const GList *iter = NULL;
 904     uint32_t instance_state = 0; // State of just this instance
 905 
 906     // No need to check further if all conditions have already been detected
 907     if (pcmk_all_flags_set(*state, instance_all)) {
 908         return;
 909     }
 910 
 911     // If instance is a collective (a cloned group), check its children instead
 912     if (instance->priv->variant > pcmk__rsc_variant_primitive) {
 913         for (iter = instance->priv->children;
 914              (iter != NULL) && !pcmk_all_flags_set(*state, instance_all);
 915              iter = iter->next) {
 916             check_instance_state((const pcmk_resource_t *) iter->data, state);
 917         }
 918         return;
 919     }
 920 
 921     // If we get here, instance is a primitive
 922 
 923     if (instance->priv->active_nodes != NULL) {
 924         instance_state |= instance_active;
 925     }
 926 
 927     // Check each of the instance's actions for runnable start or stop
 928     for (iter = instance->priv->actions;
 929          (iter != NULL) && !pcmk_all_flags_set(instance_state,
 930                                                instance_starting
 931                                                |instance_stopping);
 932          iter = iter->next) {
 933 
 934         const pcmk_action_t *action = (const pcmk_action_t *) iter->data;
 935         const bool optional = pcmk_is_set(action->flags, pcmk__action_optional);
 936 
 937         if (pcmk__str_eq(PCMK_ACTION_START, action->task, pcmk__str_none)) {
 938             if (!optional
 939                 && pcmk_is_set(action->flags, pcmk__action_runnable)) {
 940 
 941                 pcmk__rsc_trace(instance, "Instance is starting due to %s",
 942                                 action->uuid);
 943                 instance_state |= instance_starting;
 944             } else {
 945                 pcmk__rsc_trace(instance, "%s doesn't affect %s state (%s)",
 946                                 action->uuid, instance->id,
 947                                 (optional? "optional" : "unrunnable"));
 948             }
 949 
 950         } else if (pcmk__str_eq(PCMK_ACTION_STOP, action->task,
 951                                 pcmk__str_none)) {
 952             /* Only stop actions can be pseudo-actions for primitives. That
 953              * indicates that the node they are on is being fenced, so the stop
 954              * is implied rather than actually executed.
 955              */
 956             if (!optional
 957                 && pcmk_any_flags_set(action->flags, pcmk__action_pseudo
 958                                                      |pcmk__action_runnable)) {
 959                 pcmk__rsc_trace(instance, "Instance is stopping due to %s",
 960                                 action->uuid);
 961                 instance_state |= instance_stopping;
 962             } else {
 963                 pcmk__rsc_trace(instance, "%s doesn't affect %s state (%s)",
 964                                 action->uuid, instance->id,
 965                                 (optional? "optional" : "unrunnable"));
 966             }
 967         }
 968     }
 969 
 970     if (pcmk_all_flags_set(instance_state,
 971                            instance_starting|instance_stopping)) {
 972         instance_state |= instance_restarting;
 973     }
 974     *state |= instance_state;
 975 }
 976 
 977 /*!
 978  * \internal
 979  * \brief Create actions for collective resource instances
 980  *
 981  * \param[in,out] collective    Clone or bundle resource to create actions for
 982  * \param[in,out] instances     List of clone instances or bundle containers
 983  */
 984 void
 985 pcmk__create_instance_actions(pcmk_resource_t *collective, GList *instances)
     /* [previous][next][first][last][top][bottom][index][help] */
 986 {
 987     uint32_t state = 0;
 988 
 989     pcmk_action_t *stop = NULL;
 990     pcmk_action_t *stopped = NULL;
 991 
 992     pcmk_action_t *start = NULL;
 993     pcmk_action_t *started = NULL;
 994 
 995     pcmk__rsc_trace(collective, "Creating collective instance actions for %s",
 996                     collective->id);
 997 
 998     // Create actions for each instance appropriate to its variant
 999     for (GList *iter = instances; iter != NULL; iter = iter->next) {
1000         pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1001 
1002         instance->priv->cmds->create_actions(instance);
1003         check_instance_state(instance, &state);
1004     }
1005 
1006     // Create pseudo-actions for rsc start and started
1007     start = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_START,
1008                                       !pcmk_is_set(state, instance_starting),
1009                                       true);
1010     started = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_RUNNING,
1011                                         !pcmk_is_set(state, instance_starting),
1012                                         false);
1013     started->priority = PCMK_SCORE_INFINITY;
1014     if (pcmk_any_flags_set(state, instance_active|instance_starting)) {
1015         pcmk__set_action_flags(started, pcmk__action_runnable);
1016     }
1017 
1018     // Create pseudo-actions for rsc stop and stopped
1019     stop = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOP,
1020                                      !pcmk_is_set(state, instance_stopping),
1021                                      true);
1022     stopped = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOPPED,
1023                                         !pcmk_is_set(state, instance_stopping),
1024                                         true);
1025     stopped->priority = PCMK_SCORE_INFINITY;
1026     if (!pcmk_is_set(state, instance_restarting)) {
1027         pcmk__set_action_flags(stop, pcmk__action_migratable);
1028     }
1029 
1030     if (pcmk__is_clone(collective)) {
1031         pe__create_clone_notif_pseudo_ops(collective, start, started, stop,
1032                                           stopped);
1033     }
1034 }
1035 
1036 /*!
1037  * \internal
1038  * \brief Get a list of clone instances or bundle replica containers
1039  *
1040  * \param[in] rsc  Clone or bundle resource
1041  *
1042  * \return Clone instances if \p rsc is a clone, or a newly created list of
1043  *         \p rsc's replica containers if \p rsc is a bundle
1044  * \note The caller must call free_instance_list() on the result when the list
1045  *       is no longer needed.
1046  */
1047 static inline GList *
1048 get_instance_list(const pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1049 {
1050     if (pcmk__is_bundle(rsc)) {
1051         return pe__bundle_containers(rsc);
1052     } else {
1053         return rsc->priv->children;
1054     }
1055 }
1056 
1057 /*!
1058  * \internal
1059  * \brief Free any memory created by get_instance_list()
1060  *
1061  * \param[in]     rsc   Clone or bundle resource passed to get_instance_list()
1062  * \param[in,out] list  Return value of get_instance_list() for \p rsc
1063  */
1064 static inline void
1065 free_instance_list(const pcmk_resource_t *rsc, GList *list)
     /* [previous][next][first][last][top][bottom][index][help] */
1066 {
1067     if (list != rsc->priv->children) {
1068         g_list_free(list);
1069     }
1070 }
1071 
1072 /*!
1073  * \internal
1074  * \brief Check whether an instance is compatible with a role and node
1075  *
1076  * \param[in] instance  Clone instance or bundle replica container
1077  * \param[in] node      Instance must match this node
1078  * \param[in] role      If not pcmk_role_unknown, instance must match this role
1079  * \param[in] current   If true, compare instance's original node and role,
1080  *                      otherwise compare assigned next node and role
1081  *
1082  * \return true if \p instance is compatible with \p node and \p role,
1083  *         otherwise false
1084  */
1085 bool
1086 pcmk__instance_matches(const pcmk_resource_t *instance, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1087                        enum rsc_role_e role, bool current)
1088 {
1089     pcmk_node_t *instance_node = NULL;
1090 
1091     CRM_CHECK((instance != NULL) && (node != NULL), return false);
1092 
1093     if ((role != pcmk_role_unknown)
1094         && (role != instance->priv->fns->state(instance, current))) {
1095         pcmk__rsc_trace(instance,
1096                         "%s is not a compatible instance (role is not %s)",
1097                         instance->id, pcmk_role_text(role));
1098         return false;
1099     }
1100 
1101     if (!is_set_recursive(instance, pcmk__rsc_blocked, true)) {
1102         uint32_t target = pcmk__rsc_node_assigned;
1103 
1104         if (current) {
1105             target = pcmk__rsc_node_current;
1106         }
1107 
1108         // We only want instances that haven't failed
1109         instance_node = instance->priv->fns->location(instance, NULL, target);
1110     }
1111 
1112     if (instance_node == NULL) {
1113         pcmk__rsc_trace(instance,
1114                         "%s is not a compatible instance "
1115                         "(not assigned to a node)",
1116                         instance->id);
1117         return false;
1118     }
1119 
1120     if (!pcmk__same_node(instance_node, node)) {
1121         pcmk__rsc_trace(instance,
1122                         "%s is not a compatible instance "
1123                         "(assigned to %s not %s)",
1124                         instance->id, pcmk__node_name(instance_node),
1125                         pcmk__node_name(node));
1126         return false;
1127     }
1128 
1129     return true;
1130 }
1131 
1132 #define display_role(r) \
1133     (((r) == pcmk_role_unknown)? "matching" : pcmk_role_text(r))
1134 
1135 /*!
1136  * \internal
1137  * \brief Find an instance that matches a given resource by node and role
1138  *
1139  * \param[in] match_rsc  Resource that instance must match (for logging only)
1140  * \param[in] rsc        Clone or bundle resource to check for matching instance
1141  * \param[in] node       Instance must match this node
1142  * \param[in] role       If not pcmk_role_unknown, instance must match this role
1143  * \param[in] current    If true, compare instance's original node and role,
1144  *                       otherwise compare assigned next node and role
1145  *
1146  * \return \p rsc instance matching \p node and \p role if any, otherwise NULL
1147  */
1148 static pcmk_resource_t *
1149 find_compatible_instance_on_node(const pcmk_resource_t *match_rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1150                                  const pcmk_resource_t *rsc,
1151                                  const pcmk_node_t *node, enum rsc_role_e role,
1152                                  bool current)
1153 {
1154     GList *instances = NULL;
1155 
1156     instances = get_instance_list(rsc);
1157     for (GList *iter = instances; iter != NULL; iter = iter->next) {
1158         pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1159 
1160         if (pcmk__instance_matches(instance, node, role, current)) {
1161             pcmk__rsc_trace(match_rsc,
1162                             "Found %s %s instance %s compatible with %s on %s",
1163                             display_role(role), rsc->id, instance->id,
1164                             match_rsc->id, pcmk__node_name(node));
1165             free_instance_list(rsc, instances); // Only frees list, not contents
1166             return instance;
1167         }
1168     }
1169     free_instance_list(rsc, instances);
1170 
1171     pcmk__rsc_trace(match_rsc,
1172                     "No %s %s instance found compatible with %s on %s",
1173                     display_role(role), rsc->id, match_rsc->id,
1174                     pcmk__node_name(node));
1175     return NULL;
1176 }
1177 
1178 /*!
1179  * \internal
1180  * \brief Find a clone instance or bundle container compatible with a resource
1181  *
1182  * \param[in] match_rsc  Resource that instance must match
1183  * \param[in] rsc        Clone or bundle resource to check for matching instance
1184  * \param[in] role       If not pcmk_role_unknown, instance must match this role
1185  * \param[in] current    If true, compare instance's original node and role,
1186  *                       otherwise compare assigned next node and role
1187  *
1188  * \return Compatible (by \p role and \p match_rsc location) instance of \p rsc
1189  *         if any, otherwise NULL
1190  */
1191 pcmk_resource_t *
1192 pcmk__find_compatible_instance(const pcmk_resource_t *match_rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1193                                const pcmk_resource_t *rsc, enum rsc_role_e role,
1194                                bool current)
1195 {
1196     pcmk_resource_t *instance = NULL;
1197     GList *nodes = NULL;
1198     const pcmk_node_t *node = NULL;
1199     GHashTable *allowed_nodes = match_rsc->priv->allowed_nodes;
1200     uint32_t target = pcmk__rsc_node_assigned;
1201 
1202     if (current) {
1203         target = pcmk__rsc_node_current;
1204     }
1205 
1206     // If match_rsc has a node, check only that node
1207     node = match_rsc->priv->fns->location(match_rsc, NULL, target);
1208     if (node != NULL) {
1209         return find_compatible_instance_on_node(match_rsc, rsc, node, role,
1210                                                 current);
1211     }
1212 
1213     // Otherwise check for an instance matching any of match_rsc's allowed nodes
1214     nodes = pcmk__sort_nodes(g_hash_table_get_values(allowed_nodes), NULL);
1215     for (GList *iter = nodes; (iter != NULL) && (instance == NULL);
1216          iter = iter->next) {
1217         instance = find_compatible_instance_on_node(match_rsc, rsc,
1218                                                     (pcmk_node_t *) iter->data,
1219                                                     role, current);
1220     }
1221 
1222     if (instance == NULL) {
1223         pcmk__rsc_debug(rsc, "No %s instance found compatible with %s",
1224                         rsc->id, match_rsc->id);
1225     }
1226     g_list_free(nodes);
1227     return instance;
1228 }
1229 
1230 /*!
1231  * \internal
1232  * \brief Unassign an instance if mandatory ordering has no interleave match
1233  *
1234  * \param[in]     first          'First' action in an ordering
1235  * \param[in]     then           'Then' action in an ordering
1236  * \param[in,out] then_instance  'Then' instance that has no interleave match
1237  * \param[in]     type           Group of enum pcmk__action_relation_flags
1238  * \param[in]     current        If true, "then" action is stopped or demoted
1239  *
1240  * \return true if \p then_instance was unassigned, otherwise false
1241  */
1242 static bool
1243 unassign_if_mandatory(const pcmk_action_t *first, const pcmk_action_t *then,
     /* [previous][next][first][last][top][bottom][index][help] */
1244                       pcmk_resource_t *then_instance, uint32_t type,
1245                       bool current)
1246 {
1247     // Allow "then" instance to go down even without an interleave match
1248     if (current) {
1249         pcmk__rsc_trace(then->rsc,
1250                         "%s has no instance to order before stopping "
1251                         "or demoting %s",
1252                         first->rsc->id, then_instance->id);
1253 
1254     /* If the "first" action must be runnable, but there is no "first"
1255      * instance, the "then" instance must not be allowed to come up.
1256      */
1257     } else if (pcmk_any_flags_set(type, pcmk__ar_unrunnable_first_blocks
1258                                         |pcmk__ar_first_implies_then)) {
1259         pcmk__rsc_info(then->rsc,
1260                        "Inhibiting %s from being active "
1261                        "because there is no %s instance to interleave",
1262                        then_instance->id, first->rsc->id);
1263         return pcmk__assign_resource(then_instance, NULL, true, true);
1264     }
1265     return false;
1266 }
1267 
1268 /*!
1269  * \internal
1270  * \brief Find first matching action for a clone instance or bundle container
1271  *
1272  * \param[in] action       Action in an interleaved ordering
1273  * \param[in] instance     Clone instance or bundle container being interleaved
1274  * \param[in] action_name  Action to look for
1275  * \param[in] node         If not NULL, require action to be on this node
1276  * \param[in] for_first    If true, \p instance is the 'first' resource in the
1277  *                         ordering, otherwise it is the 'then' resource
1278  *
1279  * \return First action for \p instance (or in some cases if \p instance is a
1280  *         bundle container, its containerized resource) that matches
1281  *         \p action_name and \p node if any, otherwise NULL
1282  */
1283 static pcmk_action_t *
1284 find_instance_action(const pcmk_action_t *action, const pcmk_resource_t *instance,
     /* [previous][next][first][last][top][bottom][index][help] */
1285                      const char *action_name, const pcmk_node_t *node,
1286                      bool for_first)
1287 {
1288     const pcmk_resource_t *rsc = NULL;
1289     pcmk_action_t *matching_action = NULL;
1290 
1291     /* If instance is a bundle container, sometimes we should interleave the
1292      * action for the container itself, and sometimes for the containerized
1293      * resource.
1294      *
1295      * For example, given "start bundle A then bundle B", B likely requires the
1296      * service inside A's container to be active, rather than just the
1297      * container, so we should interleave the action for A's containerized
1298      * resource. On the other hand, it's possible B's container itself requires
1299      * something from A, so we should interleave the action for B's container.
1300      *
1301      * Essentially, for 'first', we should use the containerized resource for
1302      * everything except stop, and for 'then', we should use the container for
1303      * everything except promote and demote (which can only be performed on the
1304      * containerized resource).
1305      */
1306     if ((for_first && !pcmk__str_any_of(action->task, PCMK_ACTION_STOP,
1307                                         PCMK_ACTION_STOPPED, NULL))
1308 
1309         || (!for_first && pcmk__str_any_of(action->task, PCMK_ACTION_PROMOTE,
1310                                            PCMK_ACTION_PROMOTED,
1311                                            PCMK_ACTION_DEMOTE,
1312                                            PCMK_ACTION_DEMOTED, NULL))) {
1313 
1314         rsc = pe__get_rsc_in_container(instance);
1315     }
1316     if (rsc == NULL) {
1317         rsc = instance; // No containerized resource, use instance itself
1318     } else {
1319         node = NULL; // Containerized actions are on bundle-created guest
1320     }
1321 
1322     matching_action = find_first_action(rsc->priv->actions, NULL,
1323                                         action_name, node);
1324     if (matching_action != NULL) {
1325         return matching_action;
1326     }
1327 
1328     if (pcmk_is_set(instance->flags, pcmk__rsc_removed)
1329         || pcmk__str_any_of(action_name, PCMK_ACTION_STOP, PCMK_ACTION_DEMOTE,
1330                             NULL)) {
1331         crm_trace("No %s action found for %s%s",
1332                   action_name,
1333                   pcmk_is_set(instance->flags, pcmk__rsc_removed)? "orphan " : "",
1334                   instance->id);
1335     } else {
1336         crm_err("No %s action found for %s to interleave (bug?)",
1337                 action_name, instance->id);
1338     }
1339     return NULL;
1340 }
1341 
1342 /*!
1343  * \internal
1344  * \brief Get the original action name of a bundle or clone action
1345  *
1346  * Given an action for a bundle or clone, get the original action name,
1347  * mapping notify to the action being notified, and if the instances are
1348  * primitives, mapping completion actions to the action that was completed
1349  * (for example, stopped to stop).
1350  *
1351  * \param[in] action  Clone or bundle action to check
1352  *
1353  * \return Original action name for \p action
1354  */
1355 static const char *
1356 orig_action_name(const pcmk_action_t *action)
     /* [previous][next][first][last][top][bottom][index][help] */
1357 {
1358     // Any instance will do
1359     const pcmk_resource_t *instance = action->rsc->priv->children->data;
1360 
1361     char *action_type = NULL;
1362     const char *action_name = action->task;
1363     enum pcmk__action_type orig_task = pcmk__action_unspecified;
1364 
1365     if (pcmk__strcase_any_of(action->task, PCMK_ACTION_NOTIFY,
1366                              PCMK_ACTION_NOTIFIED, NULL)) {
1367         // action->uuid is RSC_(confirmed-){pre,post}_notify_ACTION_INTERVAL
1368         CRM_CHECK(parse_op_key(action->uuid, NULL, &action_type, NULL),
1369                   return pcmk__action_text(pcmk__action_unspecified));
1370         action_name = strstr(action_type, "_notify_");
1371         CRM_CHECK(action_name != NULL,
1372                   return pcmk__action_text(pcmk__action_unspecified));
1373         action_name += strlen("_notify_");
1374     }
1375     orig_task = get_complex_task(instance, action_name);
1376     free(action_type);
1377     return pcmk__action_text(orig_task);
1378 }
1379 
1380 /*!
1381  * \internal
1382  * \brief Update two interleaved actions according to an ordering between them
1383  *
1384  * Given information about an ordering of two interleaved actions, update the
1385  * actions' flags (and runnable_before members if appropriate) as appropriate
1386  * for the ordering. Effects may cascade to other orderings involving the
1387  * actions as well.
1388  *
1389  * \param[in,out] first     'First' action in an ordering
1390  * \param[in,out] then      'Then' action in an ordering
1391  * \param[in]     node      If not NULL, limit scope of ordering to this node
1392  * \param[in]     filter    Action flags to limit scope of certain updates (may
1393  *                          include pcmk__action_optional to affect only
1394  *                          mandatory actions, and pcmk__action_runnable to
1395  *                          affect only runnable actions)
1396  * \param[in]     type      Group of enum pcmk__action_relation_flags to apply
1397  *
1398  * \return Group of enum pcmk__updated flags indicating what was updated
1399  */
1400 static uint32_t
1401 update_interleaved_actions(pcmk_action_t *first, pcmk_action_t *then,
     /* [previous][next][first][last][top][bottom][index][help] */
1402                            const pcmk_node_t *node, uint32_t filter,
1403                            uint32_t type)
1404 {
1405     GList *instances = NULL;
1406     uint32_t changed = pcmk__updated_none;
1407     const char *orig_first_task = orig_action_name(first);
1408 
1409     // Stops and demotes must be interleaved with instance on current node
1410     bool current = pcmk__ends_with(first->uuid, "_" PCMK_ACTION_STOPPED "_0")
1411                    || pcmk__ends_with(first->uuid,
1412                                       "_" PCMK_ACTION_DEMOTED "_0");
1413 
1414     // Update the specified actions for each "then" instance individually
1415     instances = get_instance_list(then->rsc);
1416     for (GList *iter = instances; iter != NULL; iter = iter->next) {
1417         pcmk_resource_t *first_instance = NULL;
1418         pcmk_resource_t *then_instance = iter->data;
1419 
1420         pcmk_action_t *first_action = NULL;
1421         pcmk_action_t *then_action = NULL;
1422 
1423         // Find a "first" instance to interleave with this "then" instance
1424         first_instance = pcmk__find_compatible_instance(then_instance,
1425                                                         first->rsc,
1426                                                         pcmk_role_unknown,
1427                                                         current);
1428 
1429         if (first_instance == NULL) { // No instance can be interleaved
1430             if (unassign_if_mandatory(first, then, then_instance, type,
1431                                       current)) {
1432                 pcmk__set_updated_flags(changed, first, pcmk__updated_then);
1433             }
1434             continue;
1435         }
1436 
1437         first_action = find_instance_action(first, first_instance,
1438                                             orig_first_task, node, true);
1439         if (first_action == NULL) {
1440             continue;
1441         }
1442 
1443         then_action = find_instance_action(then, then_instance, then->task,
1444                                            node, false);
1445         if (then_action == NULL) {
1446             continue;
1447         }
1448 
1449         if (order_actions(first_action, then_action, type)) {
1450             pcmk__set_updated_flags(changed, first,
1451                                     pcmk__updated_first|pcmk__updated_then);
1452         }
1453 
1454         changed |= then_instance->priv->cmds->update_ordered_actions(
1455             first_action, then_action, node,
1456             first_instance->priv->cmds->action_flags(first_action, node),
1457             filter, type, then->rsc->priv->scheduler);
1458     }
1459     free_instance_list(then->rsc, instances);
1460     return changed;
1461 }
1462 
1463 /*!
1464  * \internal
1465  * \brief Check whether two actions in an ordering can be interleaved
1466  *
1467  * \param[in] first  'First' action in the ordering
1468  * \param[in] then   'Then' action in the ordering
1469  *
1470  * \return true if \p first and \p then can be interleaved, otherwise false
1471  */
1472 static bool
1473 can_interleave_actions(const pcmk_action_t *first, const pcmk_action_t *then)
     /* [previous][next][first][last][top][bottom][index][help] */
1474 {
1475     bool interleave = false;
1476     pcmk_resource_t *rsc = NULL;
1477 
1478     if ((first->rsc == NULL) || (then->rsc == NULL)) {
1479         crm_trace("Not interleaving %s with %s: not resource actions",
1480                   first->uuid, then->uuid);
1481         return false;
1482     }
1483 
1484     if (first->rsc == then->rsc) {
1485         crm_trace("Not interleaving %s with %s: same resource",
1486                   first->uuid, then->uuid);
1487         return false;
1488     }
1489 
1490     if ((first->rsc->priv->variant < pcmk__rsc_variant_clone)
1491         || (then->rsc->priv->variant < pcmk__rsc_variant_clone)) {
1492         crm_trace("Not interleaving %s with %s: not clones or bundles",
1493                   first->uuid, then->uuid);
1494         return false;
1495     }
1496 
1497     if (pcmk__ends_with(then->uuid, "_stop_0")
1498         || pcmk__ends_with(then->uuid, "_demote_0")) {
1499         rsc = first->rsc;
1500     } else {
1501         rsc = then->rsc;
1502     }
1503 
1504     interleave = crm_is_true(g_hash_table_lookup(rsc->priv->meta,
1505                                                  PCMK_META_INTERLEAVE));
1506     pcmk__rsc_trace(rsc, "'%s then %s' will %sbe interleaved (based on %s)",
1507                     first->uuid, then->uuid, (interleave? "" : "not "),
1508                     rsc->id);
1509     return interleave;
1510 }
1511 
1512 /*!
1513  * \internal
1514  * \brief Update non-interleaved instance actions according to an ordering
1515  *
1516  * Given information about an ordering of two non-interleaved actions, update
1517  * the actions' flags (and runnable_before members if appropriate) as
1518  * appropriate for the ordering. Effects may cascade to other orderings
1519  * involving the actions as well.
1520  *
1521  * \param[in,out] instance  Clone instance or bundle container
1522  * \param[in,out] first     "First" action in ordering
1523  * \param[in]     then      "Then" action in ordering (for \p instance's parent)
1524  * \param[in]     node      If not NULL, limit scope of ordering to this node
1525  * \param[in]     flags     Action flags for \p first for ordering purposes
1526  * \param[in]     filter    Action flags to limit scope of certain updates (may
1527  *                          include pcmk__action_optional to affect only
1528  *                          mandatory actions, and pcmk__action_runnable to
1529  *                          affect only runnable actions)
1530  * \param[in]     type      Group of enum pcmk__action_relation_flags to apply
1531  *
1532  * \return Group of enum pcmk__updated flags indicating what was updated
1533  */
1534 static uint32_t
1535 update_noninterleaved_actions(pcmk_resource_t *instance, pcmk_action_t *first,
     /* [previous][next][first][last][top][bottom][index][help] */
1536                               const pcmk_action_t *then, const pcmk_node_t *node,
1537                               uint32_t flags, uint32_t filter, uint32_t type)
1538 {
1539     pcmk_action_t *instance_action = NULL;
1540     pcmk_scheduler_t *scheduler = instance->priv->scheduler;
1541     uint32_t instance_flags = 0;
1542     uint32_t changed = pcmk__updated_none;
1543 
1544     // Check whether instance has an equivalent of "then" action
1545     instance_action = find_first_action(instance->priv->actions, NULL,
1546                                         then->task, node);
1547     if (instance_action == NULL) {
1548         return changed;
1549     }
1550 
1551     // Check whether action is runnable
1552     instance_flags = instance->priv->cmds->action_flags(instance_action, node);
1553     if (!pcmk_is_set(instance_flags, pcmk__action_runnable)) {
1554         return changed;
1555     }
1556 
1557     // If so, update actions for the instance
1558     changed = instance->priv->cmds->update_ordered_actions(first,
1559                                                            instance_action,
1560                                                            node, flags, filter,
1561                                                            type, scheduler);
1562 
1563     // Propagate any changes to later actions
1564     if (pcmk_is_set(changed, pcmk__updated_then)) {
1565         for (GList *after_iter = instance_action->actions_after;
1566              after_iter != NULL; after_iter = after_iter->next) {
1567             pcmk__related_action_t *after = after_iter->data;
1568 
1569             pcmk__update_action_for_orderings(after->action, scheduler);
1570         }
1571     }
1572 
1573     return changed;
1574 }
1575 
1576 /*!
1577  * \internal
1578  * \brief Update two actions according to an ordering between them
1579  *
1580  * Given information about an ordering of two clone or bundle actions, update
1581  * the actions' flags (and runnable_before members if appropriate) as
1582  * appropriate for the ordering. Effects may cascade to other orderings
1583  * involving the actions as well.
1584  *
1585  * \param[in,out] first      'First' action in an ordering
1586  * \param[in,out] then       'Then' action in an ordering
1587  * \param[in]     node       If not NULL, limit scope of ordering to this node
1588  *                           (only used when interleaving instances)
1589  * \param[in]     flags      Action flags for \p first for ordering purposes
1590  * \param[in]     filter     Action flags to limit scope of certain updates (may
1591  *                           include pcmk__action_optional to affect only
1592  *                           mandatory actions, and pcmk__action_runnable to
1593  *                           affect only runnable actions)
1594  * \param[in]     type       Group of enum pcmk__action_relation_flags to apply
1595  * \param[in,out] scheduler  Scheduler data
1596  *
1597  * \return Group of enum pcmk__updated flags indicating what was updated
1598  */
1599 uint32_t
1600 pcmk__instance_update_ordered_actions(pcmk_action_t *first, pcmk_action_t *then,
     /* [previous][next][first][last][top][bottom][index][help] */
1601                                       const pcmk_node_t *node, uint32_t flags,
1602                                       uint32_t filter, uint32_t type,
1603                                       pcmk_scheduler_t *scheduler)
1604 {
1605     pcmk__assert((first != NULL) && (then != NULL) && (scheduler != NULL));
1606 
1607     if (then->rsc == NULL) {
1608         return pcmk__updated_none;
1609 
1610     } else if (can_interleave_actions(first, then)) {
1611         return update_interleaved_actions(first, then, node, filter, type);
1612 
1613     } else {
1614         uint32_t changed = pcmk__updated_none;
1615         GList *instances = get_instance_list(then->rsc);
1616 
1617         // Update actions for the clone or bundle resource itself
1618         changed |= pcmk__update_ordered_actions(first, then, node, flags,
1619                                                 filter, type, scheduler);
1620 
1621         // Update the 'then' clone instances or bundle containers individually
1622         for (GList *iter = instances; iter != NULL; iter = iter->next) {
1623             pcmk_resource_t *instance = iter->data;
1624 
1625             changed |= update_noninterleaved_actions(instance, first, then,
1626                                                      node, flags, filter, type);
1627         }
1628         free_instance_list(then->rsc, instances);
1629         return changed;
1630     }
1631 }
1632 
1633 #define pe__clear_action_summary_flags(flags, action, flag) do {        \
1634         flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE,     \
1635                                      "Action summary", action->rsc->id, \
1636                                      flags, flag, #flag);               \
1637     } while (0)
1638 
1639 /*!
1640  * \internal
1641  * \brief Return action flags for a given clone or bundle action
1642  *
1643  * \param[in,out] action     Action for a clone or bundle
1644  * \param[in]     instances  Clone instances or bundle containers
1645  * \param[in]     node       If not NULL, limit effects to this node
1646  *
1647  * \return Flags appropriate to \p action on \p node
1648  */
1649 uint32_t
1650 pcmk__collective_action_flags(pcmk_action_t *action, const GList *instances,
     /* [previous][next][first][last][top][bottom][index][help] */
1651                               const pcmk_node_t *node)
1652 {
1653     bool any_runnable = false;
1654     const char *action_name = orig_action_name(action);
1655 
1656     // Set original assumptions (optional and runnable may be cleared below)
1657     uint32_t flags = pcmk__action_optional
1658                      |pcmk__action_runnable
1659                      |pcmk__action_pseudo;
1660 
1661     for (const GList *iter = instances; iter != NULL; iter = iter->next) {
1662         const pcmk_resource_t *instance = iter->data;
1663         const pcmk_node_t *instance_node = NULL;
1664         pcmk_action_t *instance_action = NULL;
1665         uint32_t instance_flags;
1666 
1667         // Node is relevant only to primitive instances
1668         if (pcmk__is_primitive(instance)) {
1669             instance_node = node;
1670         }
1671 
1672         instance_action = find_first_action(instance->priv->actions, NULL,
1673                                             action_name, instance_node);
1674         if (instance_action == NULL) {
1675             pcmk__rsc_trace(action->rsc, "%s has no %s action on %s",
1676                             instance->id, action_name, pcmk__node_name(node));
1677             continue;
1678         }
1679 
1680         pcmk__rsc_trace(action->rsc, "%s has %s for %s on %s",
1681                         instance->id, instance_action->uuid, action_name,
1682                         pcmk__node_name(node));
1683 
1684         instance_flags = instance->priv->cmds->action_flags(instance_action,
1685                                                             node);
1686 
1687         // If any instance action is mandatory, so is the collective action
1688         if (pcmk_is_set(flags, pcmk__action_optional)
1689             && !pcmk_is_set(instance_flags, pcmk__action_optional)) {
1690             pcmk__rsc_trace(instance, "%s is mandatory because %s is",
1691                             action->uuid, instance_action->uuid);
1692             pe__clear_action_summary_flags(flags, action,
1693                                            pcmk__action_optional);
1694             pcmk__clear_action_flags(action, pcmk__action_optional);
1695         }
1696 
1697         // If any instance action is runnable, so is the collective action
1698         if (pcmk_is_set(instance_flags, pcmk__action_runnable)) {
1699             any_runnable = true;
1700         }
1701     }
1702 
1703     if (!any_runnable) {
1704         pcmk__rsc_trace(action->rsc,
1705                         "%s is not runnable because no instance can run %s",
1706                         action->uuid, action_name);
1707         pe__clear_action_summary_flags(flags, action, pcmk__action_runnable);
1708         if (node == NULL) {
1709             pcmk__clear_action_flags(action, pcmk__action_runnable);
1710         }
1711     }
1712 
1713     return flags;
1714 }

/* [previous][next][first][last][top][bottom][index][help] */