root/lib/pacemaker/pcmk_sched_primitive.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sorted_allowed_nodes
  2. assign_best_node
  3. apply_this_with
  4. remote_connection_assigned
  5. pcmk__primitive_assign
  6. schedule_restart_actions
  7. set_default_next_role
  8. create_pending_start
  9. schedule_role_transition_actions
  10. pcmk__primitive_create_actions
  11. rsc_avoids_remote_nodes
  12. allowed_nodes_as_list
  13. pcmk__primitive_internal_constraints
  14. pcmk__primitive_apply_coloc_score
  15. pcmk__with_primitive_colocations
  16. pcmk__primitive_with_colocations
  17. pcmk__primitive_action_flags
  18. is_expected_node
  19. stop_resource
  20. start_resource
  21. promote_resource
  22. demote_resource
  23. assert_role_error
  24. pcmk__schedule_cleanup
  25. pcmk__primitive_add_graph_meta
  26. pcmk__primitive_add_utilization
  27. shutdown_time
  28. ban_if_not_locked
  29. pcmk__primitive_shutdown_lock

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <stdbool.h>
  13 #include <stdint.h>                 // uint8_t, uint32_t
  14 
  15 #include <crm/common/xml.h>
  16 #include <pacemaker-internal.h>
  17 
  18 #include "libpacemaker_private.h"
  19 
  20 static void stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
  21                           bool optional);
  22 static void start_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
  23                            bool optional);
  24 static void demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
  25                             bool optional);
  26 static void promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
  27                              bool optional);
  28 static void assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node,
  29                               bool optional);
  30 
  31 #define RSC_ROLE_MAX    (pcmk_role_promoted + 1)
  32 
  33 static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
  34     /* This array lists the immediate next role when transitioning from one role
  35      * to a target role. For example, when going from Stopped to Promoted, the
  36      * next role is Unpromoted, because the resource must be started before it
  37      * can be promoted. The current state then becomes Started, which is fed
  38      * into this array again, giving a next role of Promoted.
  39      *
  40      * Current role       Immediate next role   Final target role
  41      * ------------       -------------------   -----------------
  42      */
  43     /* Unknown */       { pcmk_role_unknown,    /* Unknown */
  44                           pcmk_role_stopped,    /* Stopped */
  45                           pcmk_role_stopped,    /* Started */
  46                           pcmk_role_stopped,    /* Unpromoted */
  47                           pcmk_role_stopped,    /* Promoted */
  48                         },
  49     /* Stopped */       { pcmk_role_stopped,    /* Unknown */
  50                           pcmk_role_stopped,    /* Stopped */
  51                           pcmk_role_started,    /* Started */
  52                           pcmk_role_unpromoted, /* Unpromoted */
  53                           pcmk_role_unpromoted, /* Promoted */
  54                         },
  55     /* Started */       { pcmk_role_stopped,    /* Unknown */
  56                           pcmk_role_stopped,    /* Stopped */
  57                           pcmk_role_started,    /* Started */
  58                           pcmk_role_unpromoted, /* Unpromoted */
  59                           pcmk_role_promoted,   /* Promoted */
  60                         },
  61     /* Unpromoted */    { pcmk_role_stopped,    /* Unknown */
  62                           pcmk_role_stopped,    /* Stopped */
  63                           pcmk_role_stopped,    /* Started */
  64                           pcmk_role_unpromoted, /* Unpromoted */
  65                           pcmk_role_promoted,   /* Promoted */
  66                         },
  67     /* Promoted  */     { pcmk_role_stopped,    /* Unknown */
  68                           pcmk_role_unpromoted, /* Stopped */
  69                           pcmk_role_unpromoted, /* Started */
  70                           pcmk_role_unpromoted, /* Unpromoted */
  71                           pcmk_role_promoted,   /* Promoted */
  72                         },
  73 };
  74 
  75 /*!
  76  * \internal
  77  * \brief Function to schedule actions needed for a role change
  78  *
  79  * \param[in,out] rsc       Resource whose role is changing
  80  * \param[in,out] node      Node where resource will be in its next role
  81  * \param[in]     optional  Whether scheduled actions should be optional
  82  */
  83 typedef void (*rsc_transition_fn)(pcmk_resource_t *rsc, pcmk_node_t *node,
  84                                   bool optional);
  85 
  86 static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
  87     /* This array lists the function needed to transition directly from one role
  88      * to another. NULL indicates that nothing is needed.
  89      *
  90      * Current role         Transition function             Next role
  91      * ------------         -------------------             ----------
  92      */
  93     /* Unknown */       {   assert_role_error,              /* Unknown */
  94                             stop_resource,                  /* Stopped */
  95                             assert_role_error,              /* Started */
  96                             assert_role_error,              /* Unpromoted */
  97                             assert_role_error,              /* Promoted */
  98                         },
  99     /* Stopped */       {   assert_role_error,              /* Unknown */
 100                             NULL,                           /* Stopped */
 101                             start_resource,                 /* Started */
 102                             start_resource,                 /* Unpromoted */
 103                             assert_role_error,              /* Promoted */
 104                         },
 105     /* Started */       {   assert_role_error,              /* Unknown */
 106                             stop_resource,                  /* Stopped */
 107                             NULL,                           /* Started */
 108                             NULL,                           /* Unpromoted */
 109                             promote_resource,               /* Promoted */
 110                         },
 111     /* Unpromoted */    {   assert_role_error,              /* Unknown */
 112                             stop_resource,                  /* Stopped */
 113                             stop_resource,                  /* Started */
 114                             NULL,                           /* Unpromoted */
 115                             promote_resource,               /* Promoted */
 116                         },
 117     /* Promoted  */     {   assert_role_error,              /* Unknown */
 118                             demote_resource,                /* Stopped */
 119                             demote_resource,                /* Started */
 120                             demote_resource,                /* Unpromoted */
 121                             NULL,                           /* Promoted */
 122                         },
 123 };
 124 
 125 /*!
 126  * \internal
 127  * \brief Get a list of a resource's allowed nodes sorted by node score
 128  *
 129  * \param[in] rsc  Resource to check
 130  *
 131  * \return List of allowed nodes sorted by node score
 132  */
 133 static GList *
 134 sorted_allowed_nodes(const pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 135 {
 136     if (rsc->allowed_nodes != NULL) {
 137         GList *nodes = g_hash_table_get_values(rsc->allowed_nodes);
 138 
 139         if (nodes != NULL) {
 140             return pcmk__sort_nodes(nodes, pcmk__current_node(rsc));
 141         }
 142     }
 143     return NULL;
 144 }
 145 
 146 /*!
 147  * \internal
 148  * \brief Assign a resource to its best allowed node, if possible
 149  *
 150  * \param[in,out] rsc           Resource to choose a node for
 151  * \param[in]     prefer        If not \c NULL, prefer this node when all else
 152  *                              equal
 153  * \param[in]     stop_if_fail  If \c true and \p rsc can't be assigned to a
 154  *                              node, set next role to stopped and update
 155  *                              existing actions
 156  *
 157  * \return true if \p rsc could be assigned to a node, otherwise false
 158  *
 159  * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
 160  *       completely undo the assignment. A successful assignment can be either
 161  *       undone or left alone as final. A failed assignment has the same effect
 162  *       as calling pcmk__unassign_resource(); there are no side effects on
 163  *       roles or actions.
 164  */
 165 static bool
 166 assign_best_node(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
     /* [previous][next][first][last][top][bottom][index][help] */
 167                  bool stop_if_fail)
 168 {
 169     GList *nodes = NULL;
 170     pcmk_node_t *chosen = NULL;
 171     pcmk_node_t *best = NULL;
 172     const pcmk_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc);
 173 
 174     if (prefer == NULL) {
 175         prefer = most_free_node;
 176     }
 177 
 178     if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
 179         // We've already finished assignment of resources to nodes
 180         return rsc->allocated_to != NULL;
 181     }
 182 
 183     // Sort allowed nodes by score
 184     nodes = sorted_allowed_nodes(rsc);
 185     if (nodes != NULL) {
 186         best = (pcmk_node_t *) nodes->data; // First node has best score
 187     }
 188 
 189     if ((prefer != NULL) && (nodes != NULL)) {
 190         // Get the allowed node version of prefer
 191         chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
 192 
 193         if (chosen == NULL) {
 194             pcmk__rsc_trace(rsc, "Preferred node %s for %s was unknown",
 195                             pcmk__node_name(prefer), rsc->id);
 196 
 197         /* Favor the preferred node as long as its score is at least as good as
 198          * the best allowed node's.
 199          *
 200          * An alternative would be to favor the preferred node even if the best
 201          * node is better, when the best node's score is less than INFINITY.
 202          */
 203         } else if (chosen->weight < best->weight) {
 204             pcmk__rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
 205                             pcmk__node_name(chosen), rsc->id);
 206             chosen = NULL;
 207 
 208         } else if (!pcmk__node_available(chosen, true, false)) {
 209             pcmk__rsc_trace(rsc, "Preferred node %s for %s was unavailable",
 210                             pcmk__node_name(chosen), rsc->id);
 211             chosen = NULL;
 212 
 213         } else {
 214             pcmk__rsc_trace(rsc,
 215                             "Chose preferred node %s for %s "
 216                             "(ignoring %d candidates)",
 217                             pcmk__node_name(chosen), rsc->id,
 218                             g_list_length(nodes));
 219         }
 220     }
 221 
 222     if ((chosen == NULL) && (best != NULL)) {
 223         /* Either there is no preferred node, or the preferred node is not
 224          * suitable, but another node is allowed to run the resource.
 225          */
 226 
 227         chosen = best;
 228 
 229         if (!pcmk__is_unique_clone(rsc->parent)
 230             && (chosen->weight > 0) // Zero not acceptable
 231             && pcmk__node_available(chosen, false, false)) {
 232             /* If the resource is already running on a node, prefer that node if
 233              * it is just as good as the chosen node.
 234              *
 235              * We don't do this for unique clone instances, because
 236              * pcmk__assign_instances() has already assigned instances to their
 237              * running nodes when appropriate, and if we get here, we don't want
 238              * remaining unassigned instances to prefer a node that's already
 239              * running another instance.
 240              */
 241             pcmk_node_t *running = pcmk__current_node(rsc);
 242 
 243             if (running == NULL) {
 244                 // Nothing to do
 245 
 246             } else if (!pcmk__node_available(running, true, false)) {
 247                 pcmk__rsc_trace(rsc,
 248                                 "Current node for %s (%s) can't run resources",
 249                                 rsc->id, pcmk__node_name(running));
 250 
 251             } else {
 252                 int nodes_with_best_score = 1;
 253 
 254                 for (GList *iter = nodes->next; iter; iter = iter->next) {
 255                     pcmk_node_t *allowed = (pcmk_node_t *) iter->data;
 256 
 257                     if (allowed->weight != chosen->weight) {
 258                         // The nodes are sorted by score, so no more are equal
 259                         break;
 260                     }
 261                     if (pcmk__same_node(allowed, running)) {
 262                         // Scores are equal, so prefer the current node
 263                         chosen = allowed;
 264                     }
 265                     nodes_with_best_score++;
 266                 }
 267 
 268                 if (nodes_with_best_score > 1) {
 269                     uint8_t log_level = LOG_INFO;
 270 
 271                     if (chosen->weight >= PCMK_SCORE_INFINITY) {
 272                         log_level = LOG_WARNING;
 273                     }
 274                     do_crm_log(log_level,
 275                                "Chose %s for %s from %d nodes with score %s",
 276                                pcmk__node_name(chosen), rsc->id,
 277                                nodes_with_best_score,
 278                                pcmk_readable_score(chosen->weight));
 279                 }
 280             }
 281         }
 282 
 283         pcmk__rsc_trace(rsc, "Chose %s for %s from %d candidates",
 284                         pcmk__node_name(chosen), rsc->id, g_list_length(nodes));
 285     }
 286 
 287     pcmk__assign_resource(rsc, chosen, false, stop_if_fail);
 288     g_list_free(nodes);
 289     return rsc->allocated_to != NULL;
 290 }
 291 
 292 /*!
 293  * \internal
 294  * \brief Apply a "this with" colocation to a node's allowed node scores
 295  *
 296  * \param[in,out] colocation  Colocation to apply
 297  * \param[in,out] rsc         Resource being assigned
 298  */
 299 static void
 300 apply_this_with(pcmk__colocation_t *colocation, pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 301 {
 302     GHashTable *archive = NULL;
 303     pcmk_resource_t *other = colocation->primary;
 304 
 305     // In certain cases, we will need to revert the node scores
 306     if ((colocation->dependent_role >= pcmk_role_promoted)
 307         || ((colocation->score < 0)
 308             && (colocation->score > -PCMK_SCORE_INFINITY))) {
 309         archive = pcmk__copy_node_table(rsc->allowed_nodes);
 310     }
 311 
 312     if (pcmk_is_set(other->flags, pcmk_rsc_unassigned)) {
 313         pcmk__rsc_trace(rsc,
 314                         "%s: Assigning colocation %s primary %s first"
 315                         "(score=%d role=%s)",
 316                         rsc->id, colocation->id, other->id,
 317                         colocation->score,
 318                         pcmk_role_text(colocation->dependent_role));
 319         other->cmds->assign(other, NULL, true);
 320     }
 321 
 322     // Apply the colocation score to this resource's allowed node scores
 323     rsc->cmds->apply_coloc_score(rsc, other, colocation, true);
 324     if ((archive != NULL)
 325         && !pcmk__any_node_available(rsc->allowed_nodes)) {
 326         pcmk__rsc_info(rsc,
 327                        "%s: Reverting scores from colocation with %s "
 328                        "because no nodes allowed",
 329                        rsc->id, other->id);
 330         g_hash_table_destroy(rsc->allowed_nodes);
 331         rsc->allowed_nodes = archive;
 332         archive = NULL;
 333     }
 334     if (archive != NULL) {
 335         g_hash_table_destroy(archive);
 336     }
 337 }
 338 
 339 /*!
 340  * \internal
 341  * \brief Update a Pacemaker Remote node once its connection has been assigned
 342  *
 343  * \param[in] connection  Connection resource that has been assigned
 344  */
 345 static void
 346 remote_connection_assigned(const pcmk_resource_t *connection)
     /* [previous][next][first][last][top][bottom][index][help] */
 347 {
 348     pcmk_node_t *remote_node = pcmk_find_node(connection->cluster,
 349                                               connection->id);
 350 
 351     CRM_CHECK(remote_node != NULL, return);
 352 
 353     if ((connection->allocated_to != NULL)
 354         && (connection->next_role != pcmk_role_stopped)) {
 355 
 356         crm_trace("Pacemaker Remote node %s will be online",
 357                   remote_node->details->id);
 358         remote_node->details->online = TRUE;
 359         if (remote_node->details->unseen) {
 360             // Avoid unnecessary fence, since we will attempt connection
 361             remote_node->details->unclean = FALSE;
 362         }
 363 
 364     } else {
 365         crm_trace("Pacemaker Remote node %s will be shut down "
 366                   "(%sassigned connection's next role is %s)",
 367                   remote_node->details->id,
 368                   ((connection->allocated_to == NULL)? "un" : ""),
 369                   pcmk_role_text(connection->next_role));
 370         remote_node->details->shutdown = TRUE;
 371     }
 372 }
 373 
 374 /*!
 375  * \internal
 376  * \brief Assign a primitive resource to a node
 377  *
 378  * \param[in,out] rsc           Resource to assign to a node
 379  * \param[in]     prefer        Node to prefer, if all else is equal
 380  * \param[in]     stop_if_fail  If \c true and \p rsc can't be assigned to a
 381  *                              node, set next role to stopped and update
 382  *                              existing actions
 383  *
 384  * \return Node that \p rsc is assigned to, if assigned entirely to one node
 385  *
 386  * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
 387  *       completely undo the assignment. A successful assignment can be either
 388  *       undone or left alone as final. A failed assignment has the same effect
 389  *       as calling pcmk__unassign_resource(); there are no side effects on
 390  *       roles or actions.
 391  */
 392 pcmk_node_t *
 393 pcmk__primitive_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
     /* [previous][next][first][last][top][bottom][index][help] */
 394                        bool stop_if_fail)
 395 {
 396     GList *this_with_colocations = NULL;
 397     GList *with_this_colocations = NULL;
 398     GList *iter = NULL;
 399     pcmk__colocation_t *colocation = NULL;
 400 
 401     pcmk__assert(pcmk__is_primitive(rsc));
 402 
 403     // Never assign a child without parent being assigned first
 404     if ((rsc->parent != NULL)
 405         && !pcmk_is_set(rsc->parent->flags, pcmk_rsc_assigning)) {
 406         pcmk__rsc_debug(rsc, "%s: Assigning parent %s first",
 407                         rsc->id, rsc->parent->id);
 408         rsc->parent->cmds->assign(rsc->parent, prefer, stop_if_fail);
 409     }
 410 
 411     if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
 412         // Assignment has already been done
 413         const char *node_name = "no node";
 414 
 415         if (rsc->allocated_to != NULL) {
 416             node_name = pcmk__node_name(rsc->allocated_to);
 417         }
 418         pcmk__rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id, node_name);
 419         return rsc->allocated_to;
 420     }
 421 
 422     // Ensure we detect assignment loops
 423     if (pcmk_is_set(rsc->flags, pcmk_rsc_assigning)) {
 424         pcmk__rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
 425         return NULL;
 426     }
 427     pcmk__set_rsc_flags(rsc, pcmk_rsc_assigning);
 428 
 429     pe__show_node_scores(true, rsc, "Pre-assignment", rsc->allowed_nodes,
 430                          rsc->cluster);
 431 
 432     this_with_colocations = pcmk__this_with_colocations(rsc);
 433     with_this_colocations = pcmk__with_this_colocations(rsc);
 434 
 435     // Apply mandatory colocations first, to satisfy as many as possible
 436     for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
 437         colocation = iter->data;
 438 
 439         if ((colocation->score <= -PCMK_SCORE_INFINITY)
 440             || (colocation->score >= PCMK_SCORE_INFINITY)) {
 441             apply_this_with(colocation, rsc);
 442         }
 443     }
 444     for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
 445         colocation = iter->data;
 446 
 447         if ((colocation->score <= -PCMK_SCORE_INFINITY)
 448             || (colocation->score >= PCMK_SCORE_INFINITY)) {
 449             pcmk__add_dependent_scores(colocation, rsc);
 450         }
 451     }
 452 
 453     pe__show_node_scores(true, rsc, "Mandatory-colocations",
 454                          rsc->allowed_nodes, rsc->cluster);
 455 
 456     // Then apply optional colocations
 457     for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
 458         colocation = iter->data;
 459 
 460         if ((colocation->score > -PCMK_SCORE_INFINITY)
 461             && (colocation->score < PCMK_SCORE_INFINITY)) {
 462             apply_this_with(colocation, rsc);
 463         }
 464     }
 465     for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
 466         colocation = iter->data;
 467 
 468         if ((colocation->score > -PCMK_SCORE_INFINITY)
 469             && (colocation->score < PCMK_SCORE_INFINITY)) {
 470             pcmk__add_dependent_scores(colocation, rsc);
 471         }
 472     }
 473 
 474     g_list_free(this_with_colocations);
 475     g_list_free(with_this_colocations);
 476 
 477     if (rsc->next_role == pcmk_role_stopped) {
 478         pcmk__rsc_trace(rsc,
 479                         "Banning %s from all nodes because it will be stopped",
 480                         rsc->id);
 481         resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
 482                           PCMK_META_TARGET_ROLE, rsc->cluster);
 483 
 484     } else if ((rsc->next_role > rsc->role)
 485                && !pcmk_is_set(rsc->cluster->flags, pcmk_sched_quorate)
 486                && (rsc->cluster->no_quorum_policy == pcmk_no_quorum_freeze)) {
 487         crm_notice("Resource %s cannot be elevated from %s to %s due to "
 488                    PCMK_OPT_NO_QUORUM_POLICY "=" PCMK_VALUE_FREEZE,
 489                    rsc->id, pcmk_role_text(rsc->role),
 490                    pcmk_role_text(rsc->next_role));
 491         pe__set_next_role(rsc, rsc->role,
 492                           PCMK_OPT_NO_QUORUM_POLICY "=" PCMK_VALUE_FREEZE);
 493     }
 494 
 495     pe__show_node_scores(!pcmk_is_set(rsc->cluster->flags,
 496                                       pcmk_sched_output_scores),
 497                          rsc, __func__, rsc->allowed_nodes, rsc->cluster);
 498 
 499     // Unmanage resource if fencing is enabled but no device is configured
 500     if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)
 501         && !pcmk_is_set(rsc->cluster->flags, pcmk_sched_have_fencing)) {
 502         pcmk__clear_rsc_flags(rsc, pcmk_rsc_managed);
 503     }
 504 
 505     if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
 506         // Unmanaged resources stay on their current node
 507         const char *reason = NULL;
 508         pcmk_node_t *assign_to = NULL;
 509 
 510         pe__set_next_role(rsc, rsc->role, "unmanaged");
 511         assign_to = pcmk__current_node(rsc);
 512         if (assign_to == NULL) {
 513             reason = "inactive";
 514         } else if (rsc->role == pcmk_role_promoted) {
 515             reason = "promoted";
 516         } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
 517             reason = "failed";
 518         } else {
 519             reason = "active";
 520         }
 521         pcmk__rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id,
 522                        (assign_to? assign_to->details->uname : "no node"),
 523                        reason);
 524         pcmk__assign_resource(rsc, assign_to, true, stop_if_fail);
 525 
 526     } else if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_stop_all)) {
 527         // Must stop at some point, but be consistent with stop_if_fail
 528         if (stop_if_fail) {
 529             pcmk__rsc_debug(rsc,
 530                             "Forcing %s to stop: " PCMK_OPT_STOP_ALL_RESOURCES,
 531                             rsc->id);
 532         }
 533         pcmk__assign_resource(rsc, NULL, true, stop_if_fail);
 534 
 535     } else if (!assign_best_node(rsc, prefer, stop_if_fail)) {
 536         // Assignment failed
 537         if (!pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
 538             pcmk__rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
 539         } else if ((rsc->running_on != NULL) && stop_if_fail) {
 540             pcmk__rsc_info(rsc, "Stopping removed resource %s", rsc->id);
 541         }
 542     }
 543 
 544     pcmk__clear_rsc_flags(rsc, pcmk_rsc_assigning);
 545 
 546     if (rsc->is_remote_node) {
 547         remote_connection_assigned(rsc);
 548     }
 549 
 550     return rsc->allocated_to;
 551 }
 552 
 553 /*!
 554  * \internal
 555  * \brief Schedule actions to bring resource down and back to current role
 556  *
 557  * \param[in,out] rsc           Resource to restart
 558  * \param[in,out] current       Node that resource should be brought down on
 559  * \param[in]     need_stop     Whether the resource must be stopped
 560  * \param[in]     need_promote  Whether the resource must be promoted
 561  *
 562  * \return Role that resource would have after scheduled actions are taken
 563  */
 564 static void
 565 schedule_restart_actions(pcmk_resource_t *rsc, pcmk_node_t *current,
     /* [previous][next][first][last][top][bottom][index][help] */
 566                          bool need_stop, bool need_promote)
 567 {
 568     enum rsc_role_e role = rsc->role;
 569     enum rsc_role_e next_role;
 570     rsc_transition_fn fn = NULL;
 571 
 572     pcmk__set_rsc_flags(rsc, pcmk_rsc_restarting);
 573 
 574     // Bring resource down to a stop on its current node
 575     while (role != pcmk_role_stopped) {
 576         next_role = rsc_state_matrix[role][pcmk_role_stopped];
 577         pcmk__rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
 578                         (need_stop? "required" : "optional"), rsc->id,
 579                         pcmk_role_text(role), pcmk_role_text(next_role));
 580         fn = rsc_action_matrix[role][next_role];
 581         if (fn == NULL) {
 582             break;
 583         }
 584         fn(rsc, current, !need_stop);
 585         role = next_role;
 586     }
 587 
 588     // Bring resource up to its next role on its next node
 589     while ((rsc->role <= rsc->next_role) && (role != rsc->role)
 590            && !pcmk_is_set(rsc->flags, pcmk_rsc_blocked)) {
 591         bool required = need_stop;
 592 
 593         next_role = rsc_state_matrix[role][rsc->role];
 594         if ((next_role == pcmk_role_promoted) && need_promote) {
 595             required = true;
 596         }
 597         pcmk__rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
 598                         (required? "required" : "optional"), rsc->id,
 599                         pcmk_role_text(role), pcmk_role_text(next_role));
 600         fn = rsc_action_matrix[role][next_role];
 601         if (fn == NULL) {
 602             break;
 603         }
 604         fn(rsc, rsc->allocated_to, !required);
 605         role = next_role;
 606     }
 607 
 608     pcmk__clear_rsc_flags(rsc, pcmk_rsc_restarting);
 609 }
 610 
 611 /*!
 612  * \internal
 613  * \brief If a resource's next role is not explicitly specified, set a default
 614  *
 615  * \param[in,out] rsc  Resource to set next role for
 616  *
 617  * \return "explicit" if next role was explicitly set, otherwise "implicit"
 618  */
 619 static const char *
 620 set_default_next_role(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 621 {
 622     if (rsc->next_role != pcmk_role_unknown) {
 623         return "explicit";
 624     }
 625 
 626     if (rsc->allocated_to == NULL) {
 627         pe__set_next_role(rsc, pcmk_role_stopped, "assignment");
 628     } else {
 629         pe__set_next_role(rsc, pcmk_role_started, "assignment");
 630     }
 631     return "implicit";
 632 }
 633 
 634 /*!
 635  * \internal
 636  * \brief Create an action to represent an already pending start
 637  *
 638  * \param[in,out] rsc  Resource to create start action for
 639  */
 640 static void
 641 create_pending_start(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 642 {
 643     pcmk_action_t *start = NULL;
 644 
 645     pcmk__rsc_trace(rsc,
 646                     "Creating action for %s to represent already pending start",
 647                     rsc->id);
 648     start = start_action(rsc, rsc->allocated_to, TRUE);
 649     pcmk__set_action_flags(start, pcmk_action_always_in_graph);
 650 }
 651 
 652 /*!
 653  * \internal
 654  * \brief Schedule actions needed to take a resource to its next role
 655  *
 656  * \param[in,out] rsc  Resource to schedule actions for
 657  */
 658 static void
 659 schedule_role_transition_actions(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 660 {
 661     enum rsc_role_e role = rsc->role;
 662 
 663     while (role != rsc->next_role) {
 664         enum rsc_role_e next_role = rsc_state_matrix[role][rsc->next_role];
 665         rsc_transition_fn fn = NULL;
 666 
 667         pcmk__rsc_trace(rsc,
 668                         "Creating action to take %s from %s to %s "
 669                         "(ending at %s)",
 670                         rsc->id, pcmk_role_text(role),
 671                         pcmk_role_text(next_role),
 672                         pcmk_role_text(rsc->next_role));
 673         fn = rsc_action_matrix[role][next_role];
 674         if (fn == NULL) {
 675             break;
 676         }
 677         fn(rsc, rsc->allocated_to, false);
 678         role = next_role;
 679     }
 680 }
 681 
 682 /*!
 683  * \internal
 684  * \brief Create all actions needed for a given primitive resource
 685  *
 686  * \param[in,out] rsc  Primitive resource to create actions for
 687  */
 688 void
 689 pcmk__primitive_create_actions(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 690 {
 691     bool need_stop = false;
 692     bool need_promote = false;
 693     bool is_moving = false;
 694     bool allow_migrate = false;
 695     bool multiply_active = false;
 696 
 697     pcmk_node_t *current = NULL;
 698     unsigned int num_all_active = 0;
 699     unsigned int num_clean_active = 0;
 700     const char *next_role_source = NULL;
 701 
 702     pcmk__assert(pcmk__is_primitive(rsc));
 703 
 704     next_role_source = set_default_next_role(rsc);
 705     pcmk__rsc_trace(rsc,
 706                     "Creating all actions for %s transition from %s to %s "
 707                     "(%s) on %s",
 708                     rsc->id, pcmk_role_text(rsc->role),
 709                     pcmk_role_text(rsc->next_role), next_role_source,
 710                     pcmk__node_name(rsc->allocated_to));
 711 
 712     current = rsc->fns->active_node(rsc, &num_all_active, &num_clean_active);
 713 
 714     g_list_foreach(rsc->dangling_migrations, pcmk__abort_dangling_migration,
 715                    rsc);
 716 
 717     if ((current != NULL) && (rsc->allocated_to != NULL)
 718         && !pcmk__same_node(current, rsc->allocated_to)
 719         && (rsc->next_role >= pcmk_role_started)) {
 720 
 721         pcmk__rsc_trace(rsc, "Moving %s from %s to %s",
 722                         rsc->id, pcmk__node_name(current),
 723                         pcmk__node_name(rsc->allocated_to));
 724         is_moving = true;
 725         allow_migrate = pcmk__rsc_can_migrate(rsc, current);
 726 
 727         // This is needed even if migrating (though I'm not sure why ...)
 728         need_stop = true;
 729     }
 730 
 731     // Check whether resource is partially migrated and/or multiply active
 732     if ((rsc->partial_migration_source != NULL)
 733         && (rsc->partial_migration_target != NULL)
 734         && allow_migrate && (num_all_active == 2)
 735         && pcmk__same_node(current, rsc->partial_migration_source)
 736         && pcmk__same_node(rsc->allocated_to, rsc->partial_migration_target)) {
 737         /* A partial migration is in progress, and the migration target remains
 738          * the same as when the migration began.
 739          */
 740         pcmk__rsc_trace(rsc,
 741                         "Partial migration of %s from %s to %s will continue",
 742                         rsc->id, pcmk__node_name(rsc->partial_migration_source),
 743                         pcmk__node_name(rsc->partial_migration_target));
 744 
 745     } else if ((rsc->partial_migration_source != NULL)
 746                || (rsc->partial_migration_target != NULL)) {
 747         // A partial migration is in progress but can't be continued
 748 
 749         if (num_all_active > 2) {
 750             // The resource is migrating *and* multiply active!
 751             crm_notice("Forcing recovery of %s because it is migrating "
 752                        "from %s to %s and possibly active elsewhere",
 753                        rsc->id, pcmk__node_name(rsc->partial_migration_source),
 754                        pcmk__node_name(rsc->partial_migration_target));
 755         } else {
 756             // The migration source or target isn't available
 757             crm_notice("Forcing recovery of %s because it can no longer "
 758                        "migrate from %s to %s",
 759                        rsc->id, pcmk__node_name(rsc->partial_migration_source),
 760                        pcmk__node_name(rsc->partial_migration_target));
 761         }
 762         need_stop = true;
 763         rsc->partial_migration_source = rsc->partial_migration_target = NULL;
 764         allow_migrate = false;
 765 
 766     } else if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)) {
 767         multiply_active = (num_all_active > 1);
 768     } else {
 769         /* If a resource has PCMK_META_REQUIRES set to PCMK_VALUE_NOTHING or
 770          * PCMK_VALUE_QUORUM, don't consider it active on unclean nodes (similar
 771          * to how all resources behave when PCMK_OPT_STONITH_ENABLED is false).
 772          * We can start such resources elsewhere before fencing completes, and
 773          * if we considered the resource active on the failed node, we would
 774          * attempt recovery for being active on multiple nodes.
 775          */
 776         multiply_active = (num_clean_active > 1);
 777     }
 778 
 779     if (multiply_active) {
 780         const char *class = crm_element_value(rsc->xml, PCMK_XA_CLASS);
 781 
 782         // Resource was (possibly) incorrectly multiply active
 783         pcmk__sched_err("%s resource %s might be active on %u nodes (%s)",
 784                         pcmk__s(class, "Untyped"), rsc->id, num_all_active,
 785                         pcmk__multiply_active_text(rsc->recovery_type));
 786         crm_notice("For more information, see \"What are multiply active "
 787                    "resources?\" at "
 788                    "https://projects.clusterlabs.org/w/clusterlabs/faq/");
 789 
 790         switch (rsc->recovery_type) {
 791             case pcmk_multiply_active_restart:
 792                 need_stop = true;
 793                 break;
 794             case pcmk_multiply_active_unexpected:
 795                 need_stop = true; // stop_resource() will skip expected node
 796                 pcmk__set_rsc_flags(rsc, pcmk_rsc_stop_unexpected);
 797                 break;
 798             default:
 799                 break;
 800         }
 801 
 802     } else {
 803         pcmk__clear_rsc_flags(rsc, pcmk_rsc_stop_unexpected);
 804     }
 805 
 806     if (pcmk_is_set(rsc->flags, pcmk_rsc_start_pending)) {
 807         create_pending_start(rsc);
 808     }
 809 
 810     if (is_moving) {
 811         // Remaining tests are only for resources staying where they are
 812 
 813     } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
 814         if (pcmk_is_set(rsc->flags, pcmk_rsc_stop_if_failed)) {
 815             need_stop = true;
 816             pcmk__rsc_trace(rsc, "Recovering %s", rsc->id);
 817         } else {
 818             pcmk__rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
 819             if (rsc->next_role == pcmk_role_promoted) {
 820                 need_promote = true;
 821             }
 822         }
 823 
 824     } else if (pcmk_is_set(rsc->flags, pcmk_rsc_blocked)) {
 825         pcmk__rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
 826         need_stop = true;
 827 
 828     } else if ((rsc->role > pcmk_role_started) && (current != NULL)
 829                && (rsc->allocated_to != NULL)) {
 830         pcmk_action_t *start = NULL;
 831 
 832         pcmk__rsc_trace(rsc, "Creating start action for promoted resource %s",
 833                         rsc->id);
 834         start = start_action(rsc, rsc->allocated_to, TRUE);
 835         if (!pcmk_is_set(start->flags, pcmk_action_optional)) {
 836             // Recovery of a promoted resource
 837             pcmk__rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
 838             need_stop = true;
 839         }
 840     }
 841 
 842     // Create any actions needed to bring resource down and back up to same role
 843     schedule_restart_actions(rsc, current, need_stop, need_promote);
 844 
 845     // Create any actions needed to take resource from this role to the next
 846     schedule_role_transition_actions(rsc);
 847 
 848     pcmk__create_recurring_actions(rsc);
 849 
 850     if (allow_migrate) {
 851         pcmk__create_migration_actions(rsc, current);
 852     }
 853 }
 854 
 855 /*!
 856  * \internal
 857  * \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes
 858  *
 859  * \param[in] rsc  Resource to check
 860  */
 861 static void
 862 rsc_avoids_remote_nodes(const pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 863 {
 864     GHashTableIter iter;
 865     pcmk_node_t *node = NULL;
 866 
 867     g_hash_table_iter_init(&iter, rsc->allowed_nodes);
 868     while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
 869         if (node->details->remote_rsc != NULL) {
 870             node->weight = -PCMK_SCORE_INFINITY;
 871         }
 872     }
 873 }
 874 
 875 /*!
 876  * \internal
 877  * \brief Return allowed nodes as (possibly sorted) list
 878  *
 879  * Convert a resource's hash table of allowed nodes to a list. If printing to
 880  * stdout, sort the list, to keep action ID numbers consistent for regression
 881  * test output (while avoiding the performance hit on a live cluster).
 882  *
 883  * \param[in] rsc       Resource to check for allowed nodes
 884  *
 885  * \return List of resource's allowed nodes
 886  * \note Callers should take care not to rely on the list being sorted.
 887  */
 888 static GList *
 889 allowed_nodes_as_list(const pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 890 {
 891     GList *allowed_nodes = NULL;
 892 
 893     if (rsc->allowed_nodes) {
 894         allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes);
 895     }
 896 
 897     if (!pcmk__is_daemon) {
 898         allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name);
 899     }
 900 
 901     return allowed_nodes;
 902 }
 903 
 904 /*!
 905  * \internal
 906  * \brief Create implicit constraints needed for a primitive resource
 907  *
 908  * \param[in,out] rsc  Primitive resource to create implicit constraints for
 909  */
 910 void
 911 pcmk__primitive_internal_constraints(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 912 {
 913     GList *allowed_nodes = NULL;
 914     bool check_unfencing = false;
 915     bool check_utilization = false;
 916 
 917     pcmk__assert(pcmk__is_primitive(rsc));
 918 
 919     if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
 920         pcmk__rsc_trace(rsc,
 921                         "Skipping implicit constraints for unmanaged resource "
 922                         "%s", rsc->id);
 923         return;
 924     }
 925 
 926     // Whether resource requires unfencing
 927     check_unfencing = !pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)
 928                       && pcmk_is_set(rsc->cluster->flags,
 929                                      pcmk_sched_enable_unfencing)
 930                       && pcmk_is_set(rsc->flags, pcmk_rsc_needs_unfencing);
 931 
 932     // Whether a non-default placement strategy is used
 933     check_utilization = (g_hash_table_size(rsc->utilization) > 0)
 934                          && !pcmk__str_eq(rsc->cluster->placement_strategy,
 935                                           PCMK_VALUE_DEFAULT, pcmk__str_casei);
 936 
 937     // Order stops before starts (i.e. restart)
 938     pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL,
 939                        rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL,
 940                        pcmk__ar_ordered
 941                        |pcmk__ar_first_implies_then
 942                        |pcmk__ar_intermediate_stop,
 943                        rsc->cluster);
 944 
 945     // Promotable ordering: demote before stop, start before promote
 946     if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
 947                     pcmk_rsc_promotable)
 948         || (rsc->role > pcmk_role_unpromoted)) {
 949 
 950         pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_DEMOTE, 0),
 951                            NULL,
 952                            rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
 953                            NULL,
 954                            pcmk__ar_promoted_then_implies_first, rsc->cluster);
 955 
 956         pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
 957                            NULL,
 958                            rsc, pcmk__op_key(rsc->id, PCMK_ACTION_PROMOTE, 0),
 959                            NULL,
 960                            pcmk__ar_unrunnable_first_blocks, rsc->cluster);
 961     }
 962 
 963     // Don't clear resource history if probing on same node
 964     pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_LRM_DELETE, 0),
 965                        NULL, rsc,
 966                        pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0),
 967                        NULL,
 968                        pcmk__ar_if_on_same_node|pcmk__ar_then_cancels_first,
 969                        rsc->cluster);
 970 
 971     // Certain checks need allowed nodes
 972     if (check_unfencing || check_utilization || (rsc->container != NULL)) {
 973         allowed_nodes = allowed_nodes_as_list(rsc);
 974     }
 975 
 976     if (check_unfencing) {
 977         g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc);
 978     }
 979 
 980     if (check_utilization) {
 981         pcmk__create_utilization_constraints(rsc, allowed_nodes);
 982     }
 983 
 984     if (rsc->container != NULL) {
 985         pcmk_resource_t *remote_rsc = NULL;
 986 
 987         if (rsc->is_remote_node) {
 988             // rsc is the implicit remote connection for a guest or bundle node
 989 
 990             /* Guest resources are not allowed to run on Pacemaker Remote nodes,
 991              * to avoid nesting remotes. However, bundles are allowed.
 992              */
 993             if (!pcmk_is_set(rsc->flags, pcmk_rsc_remote_nesting_allowed)) {
 994                 rsc_avoids_remote_nodes(rsc->container);
 995             }
 996 
 997             /* If someone cleans up a guest or bundle node's container, we will
 998              * likely schedule a (re-)probe of the container and recovery of the
 999              * connection. Order the connection stop after the container probe,
1000              * so that if we detect the container running, we will trigger a new
1001              * transition and avoid the unnecessary recovery.
1002              */
1003             pcmk__order_resource_actions(rsc->container, PCMK_ACTION_MONITOR,
1004                                          rsc, PCMK_ACTION_STOP,
1005                                          pcmk__ar_ordered);
1006 
1007         /* A user can specify that a resource must start on a Pacemaker Remote
1008          * node by explicitly configuring it with the container=NODENAME
1009          * meta-attribute. This is of questionable merit, since location
1010          * constraints can accomplish the same thing. But we support it, so here
1011          * we check whether a resource (that is not itself a remote connection)
1012          * has container set to a remote node or guest node resource.
1013          */
1014         } else if (rsc->container->is_remote_node) {
1015             remote_rsc = rsc->container;
1016         } else  {
1017             remote_rsc = pe__resource_contains_guest_node(rsc->cluster,
1018                                                           rsc->container);
1019         }
1020 
1021         if (remote_rsc != NULL) {
1022             /* Force the resource on the Pacemaker Remote node instead of
1023              * colocating the resource with the container resource.
1024              */
1025             for (GList *item = allowed_nodes; item; item = item->next) {
1026                 pcmk_node_t *node = item->data;
1027 
1028                 if (node->details->remote_rsc != remote_rsc) {
1029                     node->weight = -PCMK_SCORE_INFINITY;
1030                 }
1031             }
1032 
1033         } else {
1034             /* This resource is either a filler for a container that does NOT
1035              * represent a Pacemaker Remote node, or a Pacemaker Remote
1036              * connection resource for a guest node or bundle.
1037              */
1038             int score;
1039 
1040             crm_trace("Order and colocate %s relative to its container %s",
1041                       rsc->id, rsc->container->id);
1042 
1043             pcmk__new_ordering(rsc->container,
1044                                pcmk__op_key(rsc->container->id,
1045                                             PCMK_ACTION_START, 0),
1046                                NULL, rsc,
1047                                pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
1048                                NULL,
1049                                pcmk__ar_first_implies_then
1050                                |pcmk__ar_unrunnable_first_blocks,
1051                                rsc->cluster);
1052 
1053             pcmk__new_ordering(rsc,
1054                                pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
1055                                NULL,
1056                                rsc->container,
1057                                pcmk__op_key(rsc->container->id,
1058                                             PCMK_ACTION_STOP, 0),
1059                                NULL, pcmk__ar_then_implies_first, rsc->cluster);
1060 
1061             if (pcmk_is_set(rsc->flags, pcmk_rsc_remote_nesting_allowed)) {
1062                 score = 10000;    /* Highly preferred but not essential */
1063             } else {
1064                 score = PCMK_SCORE_INFINITY; // Force to run on same host
1065             }
1066             pcmk__new_colocation("#resource-with-container", NULL, score, rsc,
1067                                  rsc->container, NULL, NULL,
1068                                  pcmk__coloc_influence);
1069         }
1070     }
1071 
1072     if (rsc->is_remote_node
1073         || pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)) {
1074         /* Remote connections and fencing devices are not allowed to run on
1075          * Pacemaker Remote nodes
1076          */
1077         rsc_avoids_remote_nodes(rsc);
1078     }
1079     g_list_free(allowed_nodes);
1080 }
1081 
1082 /*!
1083  * \internal
1084  * \brief Apply a colocation's score to node scores or resource priority
1085  *
1086  * Given a colocation constraint, apply its score to the dependent's
1087  * allowed node scores (if we are still placing resources) or priority (if
1088  * we are choosing promotable clone instance roles).
1089  *
1090  * \param[in,out] dependent      Dependent resource in colocation
1091  * \param[in]     primary        Primary resource in colocation
1092  * \param[in]     colocation     Colocation constraint to apply
1093  * \param[in]     for_dependent  true if called on behalf of dependent
1094  *
1095  * \return The score added to the dependent's priority
1096  */
1097 int
1098 pcmk__primitive_apply_coloc_score(pcmk_resource_t *dependent,
     /* [previous][next][first][last][top][bottom][index][help] */
1099                                   const pcmk_resource_t *primary,
1100                                   const pcmk__colocation_t *colocation,
1101                                   bool for_dependent)
1102 {
1103     enum pcmk__coloc_affects filter_results;
1104 
1105     pcmk__assert((dependent != NULL) && (primary != NULL)
1106                  && (colocation != NULL));
1107 
1108     if (for_dependent) {
1109         // Always process on behalf of primary resource
1110         return primary->cmds->apply_coloc_score(dependent, primary, colocation,
1111                                                 false);
1112     }
1113 
1114     filter_results = pcmk__colocation_affects(dependent, primary, colocation,
1115                                               false);
1116     pcmk__rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
1117                     ((colocation->score > 0)? "Colocating" : "Anti-colocating"),
1118                     dependent->id, primary->id, colocation->id,
1119                     colocation->score,
1120                     filter_results);
1121 
1122     switch (filter_results) {
1123         case pcmk__coloc_affects_role:
1124             return pcmk__apply_coloc_to_priority(dependent, primary,
1125                                                  colocation);
1126 
1127         case pcmk__coloc_affects_location:
1128             pcmk__apply_coloc_to_scores(dependent, primary, colocation);
1129             return 0;
1130 
1131         default: // pcmk__coloc_affects_nothing
1132             return 0;
1133     }
1134 }
1135 
1136 /* Primitive implementation of
1137  * pcmk_assignment_methods_t:with_this_colocations()
1138  */
1139 void
1140 pcmk__with_primitive_colocations(const pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1141                                  const pcmk_resource_t *orig_rsc, GList **list)
1142 {
1143     pcmk__assert(pcmk__is_primitive(rsc) && (list != NULL));
1144 
1145     if (rsc == orig_rsc) {
1146         /* For the resource itself, add all of its own colocations and relevant
1147          * colocations from its parent (if any).
1148          */
1149         pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
1150         if (rsc->parent != NULL) {
1151             rsc->parent->cmds->with_this_colocations(rsc->parent, orig_rsc, list);
1152         }
1153     } else {
1154         // For an ancestor, add only explicitly configured constraints
1155         for (GList *iter = rsc->rsc_cons_lhs; iter != NULL; iter = iter->next) {
1156             pcmk__colocation_t *colocation = iter->data;
1157 
1158             if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
1159                 pcmk__add_with_this(list, colocation, orig_rsc);
1160             }
1161         }
1162     }
1163 }
1164 
1165 /* Primitive implementation of
1166  * pcmk_assignment_methods_t:this_with_colocations()
1167  */
1168 void
1169 pcmk__primitive_with_colocations(const pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1170                                  const pcmk_resource_t *orig_rsc, GList **list)
1171 {
1172     pcmk__assert(pcmk__is_primitive(rsc) && (list != NULL));
1173 
1174     if (rsc == orig_rsc) {
1175         /* For the resource itself, add all of its own colocations and relevant
1176          * colocations from its parent (if any).
1177          */
1178         pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
1179         if (rsc->parent != NULL) {
1180             rsc->parent->cmds->this_with_colocations(rsc->parent, orig_rsc, list);
1181         }
1182     } else {
1183         // For an ancestor, add only explicitly configured constraints
1184         for (GList *iter = rsc->rsc_cons; iter != NULL; iter = iter->next) {
1185             pcmk__colocation_t *colocation = iter->data;
1186 
1187             if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
1188                 pcmk__add_this_with(list, colocation, orig_rsc);
1189             }
1190         }
1191     }
1192 }
1193 
1194 /*!
1195  * \internal
1196  * \brief Return action flags for a given primitive resource action
1197  *
1198  * \param[in,out] action  Action to get flags for
1199  * \param[in]     node    If not NULL, limit effects to this node (ignored)
1200  *
1201  * \return Flags appropriate to \p action on \p node
1202  */
1203 uint32_t
1204 pcmk__primitive_action_flags(pcmk_action_t *action, const pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
1205 {
1206     pcmk__assert(action != NULL);
1207     return (uint32_t) action->flags;
1208 }
1209 
1210 /*!
1211  * \internal
1212  * \brief Check whether a node is a multiply active resource's expected node
1213  *
1214  * \param[in] rsc  Resource to check
1215  * \param[in] node  Node to check
1216  *
1217  * \return \c true if \p rsc is multiply active with
1218  *         \c PCMK_META_MULTIPLE_ACTIVE set to \c PCMK_VALUE_STOP_UNEXPECTED,
1219  *         and \p node is the node where it will remain active
1220  * \note This assumes that the resource's next role cannot be changed to stopped
1221  *       after this is called, which should be reasonable if status has already
1222  *       been unpacked and resources have been assigned to nodes.
1223  */
1224 static bool
1225 is_expected_node(const pcmk_resource_t *rsc, const pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
1226 {
1227     return pcmk_all_flags_set(rsc->flags,
1228                               pcmk_rsc_stop_unexpected|pcmk_rsc_restarting)
1229            && (rsc->next_role > pcmk_role_stopped)
1230            && pcmk__same_node(rsc->allocated_to, node);
1231 }
1232 
1233 /*!
1234  * \internal
1235  * \brief Schedule actions needed to stop a resource wherever it is active
1236  *
1237  * \param[in,out] rsc       Resource being stopped
1238  * \param[in]     node      Node where resource is being stopped (ignored)
1239  * \param[in]     optional  Whether actions should be optional
1240  */
1241 static void
1242 stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1243 {
1244     for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
1245         pcmk_node_t *current = (pcmk_node_t *) iter->data;
1246         pcmk_action_t *stop = NULL;
1247 
1248         if (is_expected_node(rsc, current)) {
1249             /* We are scheduling restart actions for a multiply active resource
1250              * with PCMK_META_MULTIPLE_ACTIVE=PCMK_VALUE_STOP_UNEXPECTED, and
1251              * this is where it should not be stopped.
1252              */
1253             pcmk__rsc_trace(rsc,
1254                             "Skipping stop of multiply active resource %s "
1255                             "on expected node %s",
1256                             rsc->id, pcmk__node_name(current));
1257             continue;
1258         }
1259 
1260         if (rsc->partial_migration_target != NULL) {
1261             // Continue migration if node originally was and remains target
1262             if (pcmk__same_node(current, rsc->partial_migration_target)
1263                 && pcmk__same_node(current, rsc->allocated_to)) {
1264                 pcmk__rsc_trace(rsc,
1265                                 "Skipping stop of %s on %s "
1266                                 "because partial migration there will continue",
1267                                 rsc->id, pcmk__node_name(current));
1268                 continue;
1269             } else {
1270                 pcmk__rsc_trace(rsc,
1271                                 "Forcing stop of %s on %s "
1272                                 "because migration target changed",
1273                                 rsc->id, pcmk__node_name(current));
1274                 optional = false;
1275             }
1276         }
1277 
1278         pcmk__rsc_trace(rsc, "Scheduling stop of %s on %s",
1279                         rsc->id, pcmk__node_name(current));
1280         stop = stop_action(rsc, current, optional);
1281 
1282         if (rsc->allocated_to == NULL) {
1283             pe_action_set_reason(stop, "node availability", true);
1284         } else if (pcmk_all_flags_set(rsc->flags, pcmk_rsc_restarting
1285                                                   |pcmk_rsc_stop_unexpected)) {
1286             /* We are stopping a multiply active resource on a node that is
1287              * not its expected node, and we are still scheduling restart
1288              * actions, so the stop is for being multiply active.
1289              */
1290             pe_action_set_reason(stop, "being multiply active", true);
1291         }
1292 
1293         if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
1294             pcmk__clear_action_flags(stop, pcmk_action_runnable);
1295         }
1296 
1297         if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_remove_after_stop)) {
1298             pcmk__schedule_cleanup(rsc, current, optional);
1299         }
1300 
1301         if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_unfencing)) {
1302             pcmk_action_t *unfence = pe_fence_op(current, PCMK_ACTION_ON, true,
1303                                                  NULL, false, rsc->cluster);
1304 
1305             order_actions(stop, unfence, pcmk__ar_then_implies_first);
1306             if (!pcmk__node_unfenced(current)) {
1307                 pcmk__sched_err("Stopping %s until %s can be unfenced",
1308                                 rsc->id, pcmk__node_name(current));
1309             }
1310         }
1311     }
1312 }
1313 
1314 /*!
1315  * \internal
1316  * \brief Schedule actions needed to start a resource on a node
1317  *
1318  * \param[in,out] rsc       Resource being started
1319  * \param[in,out] node      Node where resource should be started
1320  * \param[in]     optional  Whether actions should be optional
1321  */
1322 static void
1323 start_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1324 {
1325     pcmk_action_t *start = NULL;
1326 
1327     pcmk__assert(node != NULL);
1328 
1329     pcmk__rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)",
1330                     (optional? "optional" : "required"), rsc->id,
1331                     pcmk__node_name(node), node->weight);
1332     start = start_action(rsc, node, TRUE);
1333 
1334     pcmk__order_vs_unfence(rsc, node, start, pcmk__ar_first_implies_then);
1335 
1336     if (pcmk_is_set(start->flags, pcmk_action_runnable) && !optional) {
1337         pcmk__clear_action_flags(start, pcmk_action_optional);
1338     }
1339 
1340     if (is_expected_node(rsc, node)) {
1341         /* This could be a problem if the start becomes necessary for other
1342          * reasons later.
1343          */
1344         pcmk__rsc_trace(rsc,
1345                         "Start of multiply active resouce %s "
1346                         "on expected node %s will be a pseudo-action",
1347                         rsc->id, pcmk__node_name(node));
1348         pcmk__set_action_flags(start, pcmk_action_pseudo);
1349     }
1350 }
1351 
1352 /*!
1353  * \internal
1354  * \brief Schedule actions needed to promote a resource on a node
1355  *
1356  * \param[in,out] rsc       Resource being promoted
1357  * \param[in]     node      Node where resource should be promoted
1358  * \param[in]     optional  Whether actions should be optional
1359  */
1360 static void
1361 promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1362 {
1363     GList *iter = NULL;
1364     GList *action_list = NULL;
1365     bool runnable = true;
1366 
1367     pcmk__assert(node != NULL);
1368 
1369     // Any start must be runnable for promotion to be runnable
1370     action_list = pe__resource_actions(rsc, node, PCMK_ACTION_START, true);
1371     for (iter = action_list; iter != NULL; iter = iter->next) {
1372         pcmk_action_t *start = (pcmk_action_t *) iter->data;
1373 
1374         if (!pcmk_is_set(start->flags, pcmk_action_runnable)) {
1375             runnable = false;
1376         }
1377     }
1378     g_list_free(action_list);
1379 
1380     if (runnable) {
1381         pcmk_action_t *promote = promote_action(rsc, node, optional);
1382 
1383         pcmk__rsc_trace(rsc, "Scheduling %s promotion of %s on %s",
1384                         (optional? "optional" : "required"), rsc->id,
1385                         pcmk__node_name(node));
1386 
1387         if (is_expected_node(rsc, node)) {
1388             /* This could be a problem if the promote becomes necessary for
1389              * other reasons later.
1390              */
1391             pcmk__rsc_trace(rsc,
1392                             "Promotion of multiply active resouce %s "
1393                             "on expected node %s will be a pseudo-action",
1394                             rsc->id, pcmk__node_name(node));
1395             pcmk__set_action_flags(promote, pcmk_action_pseudo);
1396         }
1397     } else {
1398         pcmk__rsc_trace(rsc, "Not promoting %s on %s: start unrunnable",
1399                         rsc->id, pcmk__node_name(node));
1400         action_list = pe__resource_actions(rsc, node, PCMK_ACTION_PROMOTE,
1401                                            true);
1402         for (iter = action_list; iter != NULL; iter = iter->next) {
1403             pcmk_action_t *promote = (pcmk_action_t *) iter->data;
1404 
1405             pcmk__clear_action_flags(promote, pcmk_action_runnable);
1406         }
1407         g_list_free(action_list);
1408     }
1409 }
1410 
1411 /*!
1412  * \internal
1413  * \brief Schedule actions needed to demote a resource wherever it is active
1414  *
1415  * \param[in,out] rsc       Resource being demoted
1416  * \param[in]     node      Node where resource should be demoted (ignored)
1417  * \param[in]     optional  Whether actions should be optional
1418  */
1419 static void
1420 demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1421 {
1422     /* Since this will only be called for a primitive (possibly as an instance
1423      * of a collective resource), the resource is multiply active if it is
1424      * running on more than one node, so we want to demote on all of them as
1425      * part of recovery, regardless of which one is the desired node.
1426      */
1427     for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
1428         pcmk_node_t *current = (pcmk_node_t *) iter->data;
1429 
1430         if (is_expected_node(rsc, current)) {
1431             pcmk__rsc_trace(rsc,
1432                             "Skipping demote of multiply active resource %s "
1433                             "on expected node %s",
1434                             rsc->id, pcmk__node_name(current));
1435         } else {
1436             pcmk__rsc_trace(rsc, "Scheduling %s demotion of %s on %s",
1437                             (optional? "optional" : "required"), rsc->id,
1438                             pcmk__node_name(current));
1439             demote_action(rsc, current, optional);
1440         }
1441     }
1442 }
1443 
1444 static void
1445 assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1446 {
1447     pcmk__assert(false);
1448 }
1449 
1450 /*!
1451  * \internal
1452  * \brief Schedule cleanup of a resource
1453  *
1454  * \param[in,out] rsc       Resource to clean up
1455  * \param[in]     node      Node to clean up on
1456  * \param[in]     optional  Whether clean-up should be optional
1457  */
1458 void
1459 pcmk__schedule_cleanup(pcmk_resource_t *rsc, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1460                        bool optional)
1461 {
1462     /* If the cleanup is required, its orderings are optional, because they're
1463      * relevant only if both actions are required. Conversely, if the cleanup is
1464      * optional, the orderings make the then action required if the first action
1465      * becomes required.
1466      */
1467     uint32_t flag = optional? pcmk__ar_first_implies_then : pcmk__ar_ordered;
1468 
1469     CRM_CHECK((rsc != NULL) && (node != NULL), return);
1470 
1471     if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
1472         pcmk__rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed",
1473                         rsc->id, pcmk__node_name(node));
1474         return;
1475     }
1476 
1477     if (node->details->unclean || !node->details->online) {
1478         pcmk__rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable",
1479                         rsc->id, pcmk__node_name(node));
1480         return;
1481     }
1482 
1483     crm_notice("Scheduling clean-up of %s on %s",
1484                rsc->id, pcmk__node_name(node));
1485     delete_action(rsc, node, optional);
1486 
1487     // stop -> clean-up -> start
1488     pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP,
1489                                  rsc, PCMK_ACTION_DELETE, flag);
1490     pcmk__order_resource_actions(rsc, PCMK_ACTION_DELETE,
1491                                  rsc, PCMK_ACTION_START, flag);
1492 }
1493 
1494 /*!
1495  * \internal
1496  * \brief Add primitive meta-attributes relevant to graph actions to XML
1497  *
1498  * \param[in]     rsc  Primitive resource whose meta-attributes should be added
1499  * \param[in,out] xml  Transition graph action attributes XML to add to
1500  */
1501 void
1502 pcmk__primitive_add_graph_meta(const pcmk_resource_t *rsc, xmlNode *xml)
     /* [previous][next][first][last][top][bottom][index][help] */
1503 {
1504     char *name = NULL;
1505     char *value = NULL;
1506     const pcmk_resource_t *parent = NULL;
1507 
1508     pcmk__assert(pcmk__is_primitive(rsc) && (xml != NULL));
1509 
1510     /* Clone instance numbers get set internally as meta-attributes, and are
1511      * needed in the transition graph (for example, to tell unique clone
1512      * instances apart).
1513      */
1514     value = g_hash_table_lookup(rsc->meta, PCMK__META_CLONE);
1515     if (value != NULL) {
1516         name = crm_meta_name(PCMK__META_CLONE);
1517         crm_xml_add(xml, name, value);
1518         free(name);
1519     }
1520 
1521     // Not sure if this one is really needed ...
1522     value = g_hash_table_lookup(rsc->meta, PCMK_META_REMOTE_NODE);
1523     if (value != NULL) {
1524         name = crm_meta_name(PCMK_META_REMOTE_NODE);
1525         crm_xml_add(xml, name, value);
1526         free(name);
1527     }
1528 
1529     /* The container meta-attribute can be set on the primitive itself or one of
1530      * its parents (for example, a group inside a container resource), so check
1531      * them all, and keep the highest one found.
1532      */
1533     for (parent = rsc; parent != NULL; parent = parent->parent) {
1534         if (parent->container != NULL) {
1535             crm_xml_add(xml, CRM_META "_" PCMK__META_CONTAINER,
1536                         parent->container->id);
1537         }
1538     }
1539 
1540     /* Bundle replica children will get their external-ip set internally as a
1541      * meta-attribute. The graph action needs it, but under a different naming
1542      * convention than other meta-attributes.
1543      */
1544     value = g_hash_table_lookup(rsc->meta, "external-ip");
1545     if (value != NULL) {
1546         crm_xml_add(xml, "pcmk_external_ip", value);
1547     }
1548 }
1549 
1550 // Primitive implementation of pcmk_assignment_methods_t:add_utilization()
1551 void
1552 pcmk__primitive_add_utilization(const pcmk_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1553                                 const pcmk_resource_t *orig_rsc,
1554                                 GList *all_rscs, GHashTable *utilization)
1555 {
1556     pcmk__assert(pcmk__is_primitive(rsc) && (orig_rsc != NULL)
1557                  && (utilization != NULL));
1558 
1559     if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
1560         return;
1561     }
1562 
1563     pcmk__rsc_trace(orig_rsc,
1564                     "%s: Adding primitive %s as colocated utilization",
1565                     orig_rsc->id, rsc->id);
1566     pcmk__release_node_capacity(utilization, rsc);
1567 }
1568 
1569 /*!
1570  * \internal
1571  * \brief Get epoch time of node's shutdown attribute (or now if none)
1572  *
1573  * \param[in,out] node  Node to check
1574  *
1575  * \return Epoch time corresponding to shutdown attribute if set or now if not
1576  */
1577 static time_t
1578 shutdown_time(pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
1579 {
1580     const char *shutdown = pcmk__node_attr(node, PCMK__NODE_ATTR_SHUTDOWN, NULL,
1581                                            pcmk__rsc_node_current);
1582     time_t result = 0;
1583 
1584     if (shutdown != NULL) {
1585         long long result_ll;
1586         int rc = pcmk__scan_ll(shutdown, &result_ll, 0LL);
1587 
1588         if (rc == pcmk_rc_ok) {
1589             result = (time_t) result_ll;
1590         } else {
1591             crm_warn("Ignoring invalid value '%s' for %s "
1592                      PCMK__NODE_ATTR_SHUTDOWN " attribute: %s",
1593                      shutdown, pcmk__node_name(node), pcmk_rc_str(rc));
1594         }
1595     }
1596     return (result == 0)? get_effective_time(node->details->data_set) : result;
1597 }
1598 
1599 /*!
1600  * \internal
1601  * \brief Ban a resource from a node if it's not locked to the node
1602  *
1603  * \param[in]     data       Node to check
1604  * \param[in,out] user_data  Resource to check
1605  */
1606 static void
1607 ban_if_not_locked(gpointer data, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1608 {
1609     const pcmk_node_t *node = (const pcmk_node_t *) data;
1610     pcmk_resource_t *rsc = (pcmk_resource_t *) user_data;
1611 
1612     if (strcmp(node->details->uname, rsc->lock_node->details->uname) != 0) {
1613         resource_location(rsc, node, -PCMK_SCORE_INFINITY,
1614                           PCMK_OPT_SHUTDOWN_LOCK, rsc->cluster);
1615     }
1616 }
1617 
1618 // Primitive implementation of pcmk_assignment_methods_t:shutdown_lock()
1619 void
1620 pcmk__primitive_shutdown_lock(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1621 {
1622 
1623     pcmk__assert(pcmk__is_primitive(rsc));
1624 
1625     // Fence devices and remote connections can't be locked
1626     if (pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)
1627         || rsc->is_remote_node) {
1628         return;
1629     }
1630 
1631     if (rsc->lock_node != NULL) {
1632         // The lock was obtained from resource history
1633 
1634         if (rsc->running_on != NULL) {
1635             /* The resource was started elsewhere even though it is now
1636              * considered locked. This shouldn't be possible, but as a
1637              * failsafe, we don't want to disturb the resource now.
1638              */
1639             pcmk__rsc_info(rsc,
1640                            "Cancelling shutdown lock "
1641                            "because %s is already active", rsc->id);
1642             pe__clear_resource_history(rsc, rsc->lock_node);
1643             rsc->lock_node = NULL;
1644             rsc->lock_time = 0;
1645         }
1646 
1647     // Only a resource active on exactly one node can be locked
1648     } else if (pcmk__list_of_1(rsc->running_on)) {
1649         pcmk_node_t *node = rsc->running_on->data;
1650 
1651         if (node->details->shutdown) {
1652             if (node->details->unclean) {
1653                 pcmk__rsc_debug(rsc,
1654                                 "Not locking %s to unclean %s for shutdown",
1655                                 rsc->id, pcmk__node_name(node));
1656             } else {
1657                 rsc->lock_node = node;
1658                 rsc->lock_time = shutdown_time(node);
1659             }
1660         }
1661     }
1662 
1663     if (rsc->lock_node == NULL) {
1664         // No lock needed
1665         return;
1666     }
1667 
1668     if (rsc->cluster->shutdown_lock > 0) {
1669         time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock;
1670 
1671         pcmk__rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
1672                        rsc->id, pcmk__node_name(rsc->lock_node),
1673                        (long long) lock_expiration);
1674         pe__update_recheck_time(++lock_expiration, rsc->cluster,
1675                                 "shutdown lock expiration");
1676     } else {
1677         pcmk__rsc_info(rsc, "Locking %s to %s due to shutdown",
1678                        rsc->id, pcmk__node_name(rsc->lock_node));
1679     }
1680 
1681     // If resource is locked to one node, ban it from all other nodes
1682     g_list_foreach(rsc->cluster->nodes, ban_if_not_locked, rsc);
1683 }

/* [previous][next][first][last][top][bottom][index][help] */