root/lib/pacemaker/pcmk_sched_primitive.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sorted_allowed_nodes
  2. assign_best_node
  3. apply_this_with
  4. remote_connection_assigned
  5. pcmk__primitive_assign
  6. schedule_restart_actions
  7. set_default_next_role
  8. create_pending_start
  9. schedule_role_transition_actions
  10. pcmk__primitive_create_actions
  11. rsc_avoids_remote_nodes
  12. allowed_nodes_as_list
  13. pcmk__primitive_internal_constraints
  14. pcmk__primitive_apply_coloc_score
  15. pcmk__with_primitive_colocations
  16. pcmk__primitive_with_colocations
  17. pcmk__primitive_action_flags
  18. is_expected_node
  19. stop_resource
  20. start_resource
  21. promote_resource
  22. demote_resource
  23. assert_role_error
  24. pcmk__schedule_cleanup
  25. pcmk__primitive_add_graph_meta
  26. pcmk__primitive_add_utilization
  27. shutdown_time
  28. ban_if_not_locked
  29. pcmk__primitive_shutdown_lock

   1 /*
   2  * Copyright 2004-2023 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <stdbool.h>
  13 
  14 #include <crm/msg_xml.h>
  15 #include <pacemaker-internal.h>
  16 
  17 #include "libpacemaker_private.h"
  18 
  19 static void stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
  20 static void start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
  21 static void demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional);
  22 static void promote_resource(pe_resource_t *rsc, pe_node_t *node,
  23                              bool optional);
  24 static void assert_role_error(pe_resource_t *rsc, pe_node_t *node,
  25                               bool optional);
  26 
  27 static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
  28     /* This array lists the immediate next role when transitioning from one role
  29      * to a target role. For example, when going from Stopped to Promoted, the
  30      * next role is Unpromoted, because the resource must be started before it
  31      * can be promoted. The current state then becomes Started, which is fed
  32      * into this array again, giving a next role of Promoted.
  33      *
  34      * Current role       Immediate next role   Final target role
  35      * ------------       -------------------   -----------------
  36      */
  37     /* Unknown */       { RSC_ROLE_UNKNOWN,     /* Unknown */
  38                           RSC_ROLE_STOPPED,     /* Stopped */
  39                           RSC_ROLE_STOPPED,     /* Started */
  40                           RSC_ROLE_STOPPED,     /* Unpromoted */
  41                           RSC_ROLE_STOPPED,     /* Promoted */
  42                         },
  43     /* Stopped */       { RSC_ROLE_STOPPED,     /* Unknown */
  44                           RSC_ROLE_STOPPED,     /* Stopped */
  45                           RSC_ROLE_STARTED,     /* Started */
  46                           RSC_ROLE_UNPROMOTED,  /* Unpromoted */
  47                           RSC_ROLE_UNPROMOTED,  /* Promoted */
  48                         },
  49     /* Started */       { RSC_ROLE_STOPPED,     /* Unknown */
  50                           RSC_ROLE_STOPPED,     /* Stopped */
  51                           RSC_ROLE_STARTED,     /* Started */
  52                           RSC_ROLE_UNPROMOTED,  /* Unpromoted */
  53                           RSC_ROLE_PROMOTED,    /* Promoted */
  54                         },
  55     /* Unpromoted */    { RSC_ROLE_STOPPED,     /* Unknown */
  56                           RSC_ROLE_STOPPED,     /* Stopped */
  57                           RSC_ROLE_STOPPED,     /* Started */
  58                           RSC_ROLE_UNPROMOTED,  /* Unpromoted */
  59                           RSC_ROLE_PROMOTED,    /* Promoted */
  60                         },
  61     /* Promoted  */     { RSC_ROLE_STOPPED,     /* Unknown */
  62                           RSC_ROLE_UNPROMOTED,  /* Stopped */
  63                           RSC_ROLE_UNPROMOTED,  /* Started */
  64                           RSC_ROLE_UNPROMOTED,  /* Unpromoted */
  65                           RSC_ROLE_PROMOTED,    /* Promoted */
  66                         },
  67 };
  68 
  69 /*!
  70  * \internal
  71  * \brief Function to schedule actions needed for a role change
  72  *
  73  * \param[in,out] rsc       Resource whose role is changing
  74  * \param[in,out] node      Node where resource will be in its next role
  75  * \param[in]     optional  Whether scheduled actions should be optional
  76  */
  77 typedef void (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *node,
  78                                   bool optional);
  79 
  80 static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
  81     /* This array lists the function needed to transition directly from one role
  82      * to another. NULL indicates that nothing is needed.
  83      *
  84      * Current role         Transition function             Next role
  85      * ------------         -------------------             ----------
  86      */
  87     /* Unknown */       {   assert_role_error,              /* Unknown */
  88                             stop_resource,                  /* Stopped */
  89                             assert_role_error,              /* Started */
  90                             assert_role_error,              /* Unpromoted */
  91                             assert_role_error,              /* Promoted */
  92                         },
  93     /* Stopped */       {   assert_role_error,              /* Unknown */
  94                             NULL,                           /* Stopped */
  95                             start_resource,                 /* Started */
  96                             start_resource,                 /* Unpromoted */
  97                             assert_role_error,              /* Promoted */
  98                         },
  99     /* Started */       {   assert_role_error,              /* Unknown */
 100                             stop_resource,                  /* Stopped */
 101                             NULL,                           /* Started */
 102                             NULL,                           /* Unpromoted */
 103                             promote_resource,               /* Promoted */
 104                         },
 105     /* Unpromoted */    {   assert_role_error,              /* Unknown */
 106                             stop_resource,                  /* Stopped */
 107                             stop_resource,                  /* Started */
 108                             NULL,                           /* Unpromoted */
 109                             promote_resource,               /* Promoted */
 110                         },
 111     /* Promoted  */     {   assert_role_error,              /* Unknown */
 112                             demote_resource,                /* Stopped */
 113                             demote_resource,                /* Started */
 114                             demote_resource,                /* Unpromoted */
 115                             NULL,                           /* Promoted */
 116                         },
 117 };
 118 
 119 /*!
 120  * \internal
 121  * \brief Get a list of a resource's allowed nodes sorted by node weight
 122  *
 123  * \param[in] rsc  Resource to check
 124  *
 125  * \return List of allowed nodes sorted by node weight
 126  */
 127 static GList *
 128 sorted_allowed_nodes(const pe_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 129 {
 130     if (rsc->allowed_nodes != NULL) {
 131         GList *nodes = g_hash_table_get_values(rsc->allowed_nodes);
 132 
 133         if (nodes != NULL) {
 134             return pcmk__sort_nodes(nodes, pe__current_node(rsc));
 135         }
 136     }
 137     return NULL;
 138 }
 139 
 140 /*!
 141  * \internal
 142  * \brief Assign a resource to its best allowed node, if possible
 143  *
 144  * \param[in,out] rsc     Resource to choose a node for
 145  * \param[in]     prefer  If not NULL, prefer this node when all else equal
 146  *
 147  * \return true if \p rsc could be assigned to a node, otherwise false
 148  */
 149 static bool
 150 assign_best_node(pe_resource_t *rsc, const pe_node_t *prefer)
     /* [previous][next][first][last][top][bottom][index][help] */
 151 {
 152     GList *nodes = NULL;
 153     pe_node_t *chosen = NULL;
 154     pe_node_t *best = NULL;
 155     bool result = false;
 156     const pe_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc);
 157 
 158     if (prefer == NULL) {
 159         prefer = most_free_node;
 160     }
 161 
 162     if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
 163         // We've already finished assignment of resources to nodes
 164         return rsc->allocated_to != NULL;
 165     }
 166 
 167     // Sort allowed nodes by weight
 168     nodes = sorted_allowed_nodes(rsc);
 169     if (nodes != NULL) {
 170         best = (pe_node_t *) nodes->data; // First node has best score
 171     }
 172 
 173     if ((prefer != NULL) && (nodes != NULL)) {
 174         // Get the allowed node version of prefer
 175         chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
 176 
 177         if (chosen == NULL) {
 178             pe_rsc_trace(rsc, "Preferred node %s for %s was unknown",
 179                          pe__node_name(prefer), rsc->id);
 180 
 181         /* Favor the preferred node as long as its weight is at least as good as
 182          * the best allowed node's.
 183          *
 184          * An alternative would be to favor the preferred node even if the best
 185          * node is better, when the best node's weight is less than INFINITY.
 186          */
 187         } else if (chosen->weight < best->weight) {
 188             pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
 189                          pe__node_name(chosen), rsc->id);
 190             chosen = NULL;
 191 
 192         } else if (!pcmk__node_available(chosen, true, false)) {
 193             pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable",
 194                          pe__node_name(chosen), rsc->id);
 195             chosen = NULL;
 196 
 197         } else {
 198             pe_rsc_trace(rsc,
 199                          "Chose preferred node %s for %s (ignoring %d candidates)",
 200                          pe__node_name(chosen), rsc->id, g_list_length(nodes));
 201         }
 202     }
 203 
 204     if ((chosen == NULL) && (best != NULL)) {
 205         /* Either there is no preferred node, or the preferred node is not
 206          * suitable, but another node is allowed to run the resource.
 207          */
 208 
 209         chosen = best;
 210 
 211         if (!pe_rsc_is_unique_clone(rsc->parent)
 212             && (chosen->weight > 0) // Zero not acceptable
 213             && pcmk__node_available(chosen, false, false)) {
 214             /* If the resource is already running on a node, prefer that node if
 215              * it is just as good as the chosen node.
 216              *
 217              * We don't do this for unique clone instances, because
 218              * pcmk__assign_instances() has already assigned instances to their
 219              * running nodes when appropriate, and if we get here, we don't want
 220              * remaining unassigned instances to prefer a node that's already
 221              * running another instance.
 222              */
 223             pe_node_t *running = pe__current_node(rsc);
 224 
 225             if (running == NULL) {
 226                 // Nothing to do
 227 
 228             } else if (!pcmk__node_available(running, true, false)) {
 229                 pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources",
 230                              rsc->id, pe__node_name(running));
 231 
 232             } else {
 233                 int nodes_with_best_score = 1;
 234 
 235                 for (GList *iter = nodes->next; iter; iter = iter->next) {
 236                     pe_node_t *allowed = (pe_node_t *) iter->data;
 237 
 238                     if (allowed->weight != chosen->weight) {
 239                         // The nodes are sorted by weight, so no more are equal
 240                         break;
 241                     }
 242                     if (pe__same_node(allowed, running)) {
 243                         // Scores are equal, so prefer the current node
 244                         chosen = allowed;
 245                     }
 246                     nodes_with_best_score++;
 247                 }
 248 
 249                 if (nodes_with_best_score > 1) {
 250                     do_crm_log(((chosen->weight >= INFINITY)? LOG_WARNING : LOG_INFO),
 251                                "Chose %s for %s from %d nodes with score %s",
 252                                pe__node_name(chosen), rsc->id,
 253                                nodes_with_best_score,
 254                                pcmk_readable_score(chosen->weight));
 255                 }
 256             }
 257         }
 258 
 259         pe_rsc_trace(rsc, "Chose %s for %s from %d candidates",
 260                      pe__node_name(chosen), rsc->id, g_list_length(nodes));
 261     }
 262 
 263     result = pcmk__finalize_assignment(rsc, chosen, false);
 264     g_list_free(nodes);
 265     return result;
 266 }
 267 
 268 /*!
 269  * \internal
 270  * \brief Apply a "this with" colocation to a node's allowed node scores
 271  *
 272  * \param[in,out] data       Colocation to apply
 273  * \param[in,out] user_data  Resource being assigned
 274  */
 275 static void
 276 apply_this_with(gpointer data, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 277 {
 278     pcmk__colocation_t *colocation = (pcmk__colocation_t *) data;
 279     pe_resource_t *rsc = (pe_resource_t *) user_data;
 280 
 281     GHashTable *archive = NULL;
 282     pe_resource_t *other = colocation->primary;
 283 
 284     // In certain cases, we will need to revert the node scores
 285     if ((colocation->dependent_role >= RSC_ROLE_PROMOTED)
 286         || ((colocation->score < 0) && (colocation->score > -INFINITY))) {
 287         archive = pcmk__copy_node_table(rsc->allowed_nodes);
 288     }
 289 
 290     if (pcmk_is_set(other->flags, pe_rsc_provisional)) {
 291         pe_rsc_trace(rsc,
 292                      "%s: Assigning colocation %s primary %s first"
 293                      "(score=%d role=%s)",
 294                      rsc->id, colocation->id, other->id,
 295                      colocation->score, role2text(colocation->dependent_role));
 296         other->cmds->assign(other, NULL);
 297     }
 298 
 299     // Apply the colocation score to this resource's allowed node scores
 300     rsc->cmds->apply_coloc_score(rsc, other, colocation, true);
 301     if ((archive != NULL)
 302         && !pcmk__any_node_available(rsc->allowed_nodes)) {
 303         pe_rsc_info(rsc,
 304                     "%s: Reverting scores from colocation with %s "
 305                     "because no nodes allowed",
 306                     rsc->id, other->id);
 307         g_hash_table_destroy(rsc->allowed_nodes);
 308         rsc->allowed_nodes = archive;
 309         archive = NULL;
 310     }
 311     if (archive != NULL) {
 312         g_hash_table_destroy(archive);
 313     }
 314 }
 315 
 316 /*!
 317  * \internal
 318  * \brief Update a Pacemaker Remote node once its connection has been assigned
 319  *
 320  * \param[in] connection  Connection resource that has been assigned
 321  */
 322 static void
 323 remote_connection_assigned(const pe_resource_t *connection)
     /* [previous][next][first][last][top][bottom][index][help] */
 324 {
 325     pe_node_t *remote_node = pe_find_node(connection->cluster->nodes,
 326                                           connection->id);
 327 
 328     CRM_CHECK(remote_node != NULL, return);
 329 
 330     if ((connection->allocated_to != NULL)
 331         && (connection->next_role != RSC_ROLE_STOPPED)) {
 332 
 333         crm_trace("Pacemaker Remote node %s will be online",
 334                   remote_node->details->id);
 335         remote_node->details->online = TRUE;
 336         if (remote_node->details->unseen) {
 337             // Avoid unnecessary fence, since we will attempt connection
 338             remote_node->details->unclean = FALSE;
 339         }
 340 
 341     } else {
 342         crm_trace("Pacemaker Remote node %s will be shut down "
 343                   "(%sassigned connection's next role is %s)",
 344                   remote_node->details->id,
 345                   ((connection->allocated_to == NULL)? "un" : ""),
 346                   role2text(connection->next_role));
 347         remote_node->details->shutdown = TRUE;
 348     }
 349 }
 350 
 351 /*!
 352  * \internal
 353  * \brief Assign a primitive resource to a node
 354  *
 355  * \param[in,out] rsc     Resource to assign to a node
 356  * \param[in]     prefer  Node to prefer, if all else is equal
 357  *
 358  * \return Node that \p rsc is assigned to, if assigned entirely to one node
 359  */
 360 pe_node_t *
 361 pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer)
     /* [previous][next][first][last][top][bottom][index][help] */
 362 {
 363     GList *this_with_colocations = NULL;
 364     GList *with_this_colocations = NULL;
 365     GList *iter = NULL;
 366     pcmk__colocation_t *colocation = NULL;
 367 
 368     CRM_ASSERT(rsc != NULL);
 369 
 370     // Never assign a child without parent being assigned first
 371     if ((rsc->parent != NULL)
 372         && !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) {
 373         pe_rsc_debug(rsc, "%s: Assigning parent %s first",
 374                      rsc->id, rsc->parent->id);
 375         rsc->parent->cmds->assign(rsc->parent, prefer);
 376     }
 377 
 378     if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
 379         return rsc->allocated_to; // Assignment has already been done
 380     }
 381 
 382     // Ensure we detect assignment loops
 383     if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) {
 384         pe_rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
 385         return NULL;
 386     }
 387     pe__set_resource_flags(rsc, pe_rsc_allocating);
 388 
 389     pe__show_node_weights(true, rsc, "Pre-assignment", rsc->allowed_nodes,
 390                           rsc->cluster);
 391 
 392     this_with_colocations = pcmk__this_with_colocations(rsc);
 393     with_this_colocations = pcmk__with_this_colocations(rsc);
 394 
 395     // Apply mandatory colocations first, to satisfy as many as possible
 396     for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
 397         colocation = iter->data;
 398         if ((colocation->score <= -CRM_SCORE_INFINITY)
 399             || (colocation->score >= CRM_SCORE_INFINITY)) {
 400             apply_this_with(iter->data, rsc);
 401         }
 402     }
 403     for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
 404         colocation = iter->data;
 405         if ((colocation->score <= -CRM_SCORE_INFINITY)
 406             || (colocation->score >= CRM_SCORE_INFINITY)) {
 407             pcmk__add_dependent_scores(iter->data, rsc);
 408         }
 409     }
 410 
 411     pe__show_node_weights(true, rsc, "Mandatory-colocations",
 412                           rsc->allowed_nodes, rsc->cluster);
 413 
 414     // Then apply optional colocations
 415     for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
 416         colocation = iter->data;
 417 
 418         if ((colocation->score > -CRM_SCORE_INFINITY)
 419             && (colocation->score < CRM_SCORE_INFINITY)) {
 420             apply_this_with(iter->data, rsc);
 421         }
 422     }
 423     for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
 424         colocation = iter->data;
 425 
 426         if ((colocation->score > -CRM_SCORE_INFINITY)
 427             && (colocation->score < CRM_SCORE_INFINITY)) {
 428             pcmk__add_dependent_scores(iter->data, rsc);
 429         }
 430     }
 431 
 432     g_list_free(this_with_colocations);
 433     g_list_free(with_this_colocations);
 434 
 435     if (rsc->next_role == RSC_ROLE_STOPPED) {
 436         pe_rsc_trace(rsc,
 437                      "Banning %s from all nodes because it will be stopped",
 438                      rsc->id);
 439         resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE,
 440                           rsc->cluster);
 441 
 442     } else if ((rsc->next_role > rsc->role)
 443                && !pcmk_is_set(rsc->cluster->flags, pe_flag_have_quorum)
 444                && (rsc->cluster->no_quorum_policy == no_quorum_freeze)) {
 445         crm_notice("Resource %s cannot be elevated from %s to %s due to "
 446                    "no-quorum-policy=freeze",
 447                    rsc->id, role2text(rsc->role), role2text(rsc->next_role));
 448         pe__set_next_role(rsc, rsc->role, "no-quorum-policy=freeze");
 449     }
 450 
 451     pe__show_node_weights(!pcmk_is_set(rsc->cluster->flags, pe_flag_show_scores),
 452                           rsc, __func__, rsc->allowed_nodes, rsc->cluster);
 453 
 454     // Unmanage resource if fencing is enabled but no device is configured
 455     if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)
 456         && !pcmk_is_set(rsc->cluster->flags, pe_flag_have_stonith_resource)) {
 457         pe__clear_resource_flags(rsc, pe_rsc_managed);
 458     }
 459 
 460     if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
 461         // Unmanaged resources stay on their current node
 462         const char *reason = NULL;
 463         pe_node_t *assign_to = NULL;
 464 
 465         pe__set_next_role(rsc, rsc->role, "unmanaged");
 466         assign_to = pe__current_node(rsc);
 467         if (assign_to == NULL) {
 468             reason = "inactive";
 469         } else if (rsc->role == RSC_ROLE_PROMOTED) {
 470             reason = "promoted";
 471         } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
 472             reason = "failed";
 473         } else {
 474             reason = "active";
 475         }
 476         pe_rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id,
 477                     (assign_to? assign_to->details->uname : "no node"), reason);
 478         pcmk__finalize_assignment(rsc, assign_to, true);
 479 
 480     } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stop_everything)) {
 481         pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources", rsc->id);
 482         pcmk__finalize_assignment(rsc, NULL, true);
 483 
 484     } else if (pcmk_is_set(rsc->flags, pe_rsc_provisional)
 485                && assign_best_node(rsc, prefer)) {
 486         // Assignment successful
 487 
 488     } else if (rsc->allocated_to == NULL) {
 489         if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
 490             pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
 491         } else if (rsc->running_on != NULL) {
 492             pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id);
 493         }
 494 
 495     } else {
 496         pe_rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id,
 497                      pe__node_name(rsc->allocated_to));
 498     }
 499 
 500     pe__clear_resource_flags(rsc, pe_rsc_allocating);
 501 
 502     if (rsc->is_remote_node) {
 503         remote_connection_assigned(rsc);
 504     }
 505 
 506     return rsc->allocated_to;
 507 }
 508 
 509 /*!
 510  * \internal
 511  * \brief Schedule actions to bring resource down and back to current role
 512  *
 513  * \param[in,out] rsc           Resource to restart
 514  * \param[in,out] current       Node that resource should be brought down on
 515  * \param[in]     need_stop     Whether the resource must be stopped
 516  * \param[in]     need_promote  Whether the resource must be promoted
 517  *
 518  * \return Role that resource would have after scheduled actions are taken
 519  */
 520 static void
 521 schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
     /* [previous][next][first][last][top][bottom][index][help] */
 522                          bool need_stop, bool need_promote)
 523 {
 524     enum rsc_role_e role = rsc->role;
 525     enum rsc_role_e next_role;
 526     rsc_transition_fn fn = NULL;
 527 
 528     pe__set_resource_flags(rsc, pe_rsc_restarting);
 529 
 530     // Bring resource down to a stop on its current node
 531     while (role != RSC_ROLE_STOPPED) {
 532         next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
 533         pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
 534                      (need_stop? "required" : "optional"), rsc->id,
 535                      role2text(role), role2text(next_role));
 536         fn = rsc_action_matrix[role][next_role];
 537         if (fn == NULL) {
 538             break;
 539         }
 540         fn(rsc, current, !need_stop);
 541         role = next_role;
 542     }
 543 
 544     // Bring resource up to its next role on its next node
 545     while ((rsc->role <= rsc->next_role) && (role != rsc->role)
 546            && !pcmk_is_set(rsc->flags, pe_rsc_block)) {
 547         bool required = need_stop;
 548 
 549         next_role = rsc_state_matrix[role][rsc->role];
 550         if ((next_role == RSC_ROLE_PROMOTED) && need_promote) {
 551             required = true;
 552         }
 553         pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
 554                      (required? "required" : "optional"), rsc->id,
 555                      role2text(role), role2text(next_role));
 556         fn = rsc_action_matrix[role][next_role];
 557         if (fn == NULL) {
 558             break;
 559         }
 560         fn(rsc, rsc->allocated_to, !required);
 561         role = next_role;
 562     }
 563 
 564     pe__clear_resource_flags(rsc, pe_rsc_restarting);
 565 }
 566 
 567 /*!
 568  * \internal
 569  * \brief If a resource's next role is not explicitly specified, set a default
 570  *
 571  * \param[in,out] rsc  Resource to set next role for
 572  *
 573  * \return "explicit" if next role was explicitly set, otherwise "implicit"
 574  */
 575 static const char *
 576 set_default_next_role(pe_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 577 {
 578     if (rsc->next_role != RSC_ROLE_UNKNOWN) {
 579         return "explicit";
 580     }
 581 
 582     if (rsc->allocated_to == NULL) {
 583         pe__set_next_role(rsc, RSC_ROLE_STOPPED, "assignment");
 584     } else {
 585         pe__set_next_role(rsc, RSC_ROLE_STARTED, "assignment");
 586     }
 587     return "implicit";
 588 }
 589 
 590 /*!
 591  * \internal
 592  * \brief Create an action to represent an already pending start
 593  *
 594  * \param[in,out] rsc  Resource to create start action for
 595  */
 596 static void
 597 create_pending_start(pe_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 598 {
 599     pe_action_t *start = NULL;
 600 
 601     pe_rsc_trace(rsc,
 602                  "Creating action for %s to represent already pending start",
 603                  rsc->id);
 604     start = start_action(rsc, rsc->allocated_to, TRUE);
 605     pe__set_action_flags(start, pe_action_print_always);
 606 }
 607 
 608 /*!
 609  * \internal
 610  * \brief Schedule actions needed to take a resource to its next role
 611  *
 612  * \param[in,out] rsc  Resource to schedule actions for
 613  */
 614 static void
 615 schedule_role_transition_actions(pe_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 616 {
 617     enum rsc_role_e role = rsc->role;
 618 
 619     while (role != rsc->next_role) {
 620         enum rsc_role_e next_role = rsc_state_matrix[role][rsc->next_role];
 621         rsc_transition_fn fn = NULL;
 622 
 623         pe_rsc_trace(rsc,
 624                      "Creating action to take %s from %s to %s (ending at %s)",
 625                      rsc->id, role2text(role), role2text(next_role),
 626                      role2text(rsc->next_role));
 627         fn = rsc_action_matrix[role][next_role];
 628         if (fn == NULL) {
 629             break;
 630         }
 631         fn(rsc, rsc->allocated_to, false);
 632         role = next_role;
 633     }
 634 }
 635 
 636 /*!
 637  * \internal
 638  * \brief Create all actions needed for a given primitive resource
 639  *
 640  * \param[in,out] rsc  Primitive resource to create actions for
 641  */
 642 void
 643 pcmk__primitive_create_actions(pe_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 644 {
 645     bool need_stop = false;
 646     bool need_promote = false;
 647     bool is_moving = false;
 648     bool allow_migrate = false;
 649     bool multiply_active = false;
 650 
 651     pe_node_t *current = NULL;
 652     unsigned int num_all_active = 0;
 653     unsigned int num_clean_active = 0;
 654     const char *next_role_source = NULL;
 655 
 656     CRM_ASSERT(rsc != NULL);
 657 
 658     next_role_source = set_default_next_role(rsc);
 659     pe_rsc_trace(rsc,
 660                  "Creating all actions for %s transition from %s to %s "
 661                  "(%s) on %s",
 662                  rsc->id, role2text(rsc->role), role2text(rsc->next_role),
 663                  next_role_source, pe__node_name(rsc->allocated_to));
 664 
 665     current = rsc->fns->active_node(rsc, &num_all_active, &num_clean_active);
 666 
 667     g_list_foreach(rsc->dangling_migrations, pcmk__abort_dangling_migration,
 668                    rsc);
 669 
 670     if ((current != NULL) && (rsc->allocated_to != NULL)
 671         && (current->details != rsc->allocated_to->details)
 672         && (rsc->next_role >= RSC_ROLE_STARTED)) {
 673 
 674         pe_rsc_trace(rsc, "Moving %s from %s to %s",
 675                      rsc->id, pe__node_name(current),
 676                      pe__node_name(rsc->allocated_to));
 677         is_moving = true;
 678         allow_migrate = pcmk__rsc_can_migrate(rsc, current);
 679 
 680         // This is needed even if migrating (though I'm not sure why ...)
 681         need_stop = true;
 682     }
 683 
 684     // Check whether resource is partially migrated and/or multiply active
 685     if ((rsc->partial_migration_source != NULL)
 686         && (rsc->partial_migration_target != NULL)
 687         && allow_migrate && (num_all_active == 2)
 688         && pe__same_node(current, rsc->partial_migration_source)
 689         && pe__same_node(rsc->allocated_to, rsc->partial_migration_target)) {
 690         /* A partial migration is in progress, and the migration target remains
 691          * the same as when the migration began.
 692          */
 693         pe_rsc_trace(rsc, "Partial migration of %s from %s to %s will continue",
 694                      rsc->id, pe__node_name(rsc->partial_migration_source),
 695                      pe__node_name(rsc->partial_migration_target));
 696 
 697     } else if ((rsc->partial_migration_source != NULL)
 698                || (rsc->partial_migration_target != NULL)) {
 699         // A partial migration is in progress but can't be continued
 700 
 701         if (num_all_active > 2) {
 702             // The resource is migrating *and* multiply active!
 703             crm_notice("Forcing recovery of %s because it is migrating "
 704                        "from %s to %s and possibly active elsewhere",
 705                        rsc->id, pe__node_name(rsc->partial_migration_source),
 706                        pe__node_name(rsc->partial_migration_target));
 707         } else {
 708             // The migration source or target isn't available
 709             crm_notice("Forcing recovery of %s because it can no longer "
 710                        "migrate from %s to %s",
 711                        rsc->id, pe__node_name(rsc->partial_migration_source),
 712                        pe__node_name(rsc->partial_migration_target));
 713         }
 714         need_stop = true;
 715         rsc->partial_migration_source = rsc->partial_migration_target = NULL;
 716         allow_migrate = false;
 717 
 718     } else if (pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)) {
 719         multiply_active = (num_all_active > 1);
 720     } else {
 721         /* If a resource has "requires" set to nothing or quorum, don't consider
 722          * it active on unclean nodes (similar to how all resources behave when
 723          * stonith-enabled is false). We can start such resources elsewhere
 724          * before fencing completes, and if we considered the resource active on
 725          * the failed node, we would attempt recovery for being active on
 726          * multiple nodes.
 727          */
 728         multiply_active = (num_clean_active > 1);
 729     }
 730 
 731     if (multiply_active) {
 732         const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
 733 
 734         // Resource was (possibly) incorrectly multiply active
 735         pe_proc_err("%s resource %s might be active on %u nodes (%s)",
 736                     pcmk__s(class, "Untyped"), rsc->id, num_all_active,
 737                     recovery2text(rsc->recovery_type));
 738         crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ"
 739                    "#Resource_is_Too_Active for more information");
 740 
 741         switch (rsc->recovery_type) {
 742             case recovery_stop_start:
 743                 need_stop = true;
 744                 break;
 745             case recovery_stop_unexpected:
 746                 need_stop = true; // stop_resource() will skip expected node
 747                 pe__set_resource_flags(rsc, pe_rsc_stop_unexpected);
 748                 break;
 749             default:
 750                 break;
 751         }
 752 
 753     } else {
 754         pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected);
 755     }
 756 
 757     if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
 758         create_pending_start(rsc);
 759     }
 760 
 761     if (is_moving) {
 762         // Remaining tests are only for resources staying where they are
 763 
 764     } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
 765         if (pcmk_is_set(rsc->flags, pe_rsc_stop)) {
 766             need_stop = true;
 767             pe_rsc_trace(rsc, "Recovering %s", rsc->id);
 768         } else {
 769             pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
 770             if (rsc->next_role == RSC_ROLE_PROMOTED) {
 771                 need_promote = true;
 772             }
 773         }
 774 
 775     } else if (pcmk_is_set(rsc->flags, pe_rsc_block)) {
 776         pe_rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
 777         need_stop = true;
 778 
 779     } else if ((rsc->role > RSC_ROLE_STARTED) && (current != NULL)
 780                && (rsc->allocated_to != NULL)) {
 781         pe_action_t *start = NULL;
 782 
 783         pe_rsc_trace(rsc, "Creating start action for promoted resource %s",
 784                      rsc->id);
 785         start = start_action(rsc, rsc->allocated_to, TRUE);
 786         if (!pcmk_is_set(start->flags, pe_action_optional)) {
 787             // Recovery of a promoted resource
 788             pe_rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
 789             need_stop = true;
 790         }
 791     }
 792 
 793     // Create any actions needed to bring resource down and back up to same role
 794     schedule_restart_actions(rsc, current, need_stop, need_promote);
 795 
 796     // Create any actions needed to take resource from this role to the next
 797     schedule_role_transition_actions(rsc);
 798 
 799     pcmk__create_recurring_actions(rsc);
 800 
 801     if (allow_migrate) {
 802         pcmk__create_migration_actions(rsc, current);
 803     }
 804 }
 805 
 806 /*!
 807  * \internal
 808  * \brief Ban a resource from any allowed nodes that are Pacemaker Remote nodes
 809  *
 810  * \param[in] rsc  Resource to check
 811  */
 812 static void
 813 rsc_avoids_remote_nodes(const pe_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 814 {
 815     GHashTableIter iter;
 816     pe_node_t *node = NULL;
 817 
 818     g_hash_table_iter_init(&iter, rsc->allowed_nodes);
 819     while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
 820         if (node->details->remote_rsc != NULL) {
 821             node->weight = -INFINITY;
 822         }
 823     }
 824 }
 825 
 826 /*!
 827  * \internal
 828  * \brief Return allowed nodes as (possibly sorted) list
 829  *
 830  * Convert a resource's hash table of allowed nodes to a list. If printing to
 831  * stdout, sort the list, to keep action ID numbers consistent for regression
 832  * test output (while avoiding the performance hit on a live cluster).
 833  *
 834  * \param[in] rsc       Resource to check for allowed nodes
 835  *
 836  * \return List of resource's allowed nodes
 837  * \note Callers should take care not to rely on the list being sorted.
 838  */
 839 static GList *
 840 allowed_nodes_as_list(const pe_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 841 {
 842     GList *allowed_nodes = NULL;
 843 
 844     if (rsc->allowed_nodes) {
 845         allowed_nodes = g_hash_table_get_values(rsc->allowed_nodes);
 846     }
 847 
 848     if (!pcmk__is_daemon) {
 849         allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name);
 850     }
 851 
 852     return allowed_nodes;
 853 }
 854 
 855 /*!
 856  * \internal
 857  * \brief Create implicit constraints needed for a primitive resource
 858  *
 859  * \param[in,out] rsc  Primitive resource to create implicit constraints for
 860  */
 861 void
 862 pcmk__primitive_internal_constraints(pe_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 863 {
 864     GList *allowed_nodes = NULL;
 865     bool check_unfencing = false;
 866     bool check_utilization = false;
 867 
 868     CRM_ASSERT(rsc != NULL);
 869 
 870     if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
 871         pe_rsc_trace(rsc,
 872                      "Skipping implicit constraints for unmanaged resource %s",
 873                      rsc->id);
 874         return;
 875     }
 876 
 877     // Whether resource requires unfencing
 878     check_unfencing = !pcmk_is_set(rsc->flags, pe_rsc_fence_device)
 879                       && pcmk_is_set(rsc->cluster->flags, pe_flag_enable_unfencing)
 880                       && pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing);
 881 
 882     // Whether a non-default placement strategy is used
 883     check_utilization = (g_hash_table_size(rsc->utilization) > 0)
 884                          && !pcmk__str_eq(rsc->cluster->placement_strategy,
 885                                           "default", pcmk__str_casei);
 886 
 887     // Order stops before starts (i.e. restart)
 888     pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
 889                        rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
 890                        pe_order_optional|pe_order_implies_then|pe_order_restart,
 891                        rsc->cluster);
 892 
 893     // Promotable ordering: demote before stop, start before promote
 894     if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
 895                     pe_rsc_promotable)
 896         || (rsc->role > RSC_ROLE_UNPROMOTED)) {
 897 
 898         pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_DEMOTE, 0), NULL,
 899                            rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
 900                            pe_order_promoted_implies_first, rsc->cluster);
 901 
 902         pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_START, 0), NULL,
 903                            rsc, pcmk__op_key(rsc->id, RSC_PROMOTE, 0), NULL,
 904                            pe_order_runnable_left, rsc->cluster);
 905     }
 906 
 907     // Don't clear resource history if probing on same node
 908     pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0),
 909                        NULL, rsc, pcmk__op_key(rsc->id, RSC_STATUS, 0),
 910                        NULL, pe_order_same_node|pe_order_then_cancels_first,
 911                        rsc->cluster);
 912 
 913     // Certain checks need allowed nodes
 914     if (check_unfencing || check_utilization || (rsc->container != NULL)) {
 915         allowed_nodes = allowed_nodes_as_list(rsc);
 916     }
 917 
 918     if (check_unfencing) {
 919         g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc);
 920     }
 921 
 922     if (check_utilization) {
 923         pcmk__create_utilization_constraints(rsc, allowed_nodes);
 924     }
 925 
 926     if (rsc->container != NULL) {
 927         pe_resource_t *remote_rsc = NULL;
 928 
 929         if (rsc->is_remote_node) {
 930             // rsc is the implicit remote connection for a guest or bundle node
 931 
 932             /* Guest resources are not allowed to run on Pacemaker Remote nodes,
 933              * to avoid nesting remotes. However, bundles are allowed.
 934              */
 935             if (!pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
 936                 rsc_avoids_remote_nodes(rsc->container);
 937             }
 938 
 939             /* If someone cleans up a guest or bundle node's container, we will
 940              * likely schedule a (re-)probe of the container and recovery of the
 941              * connection. Order the connection stop after the container probe,
 942              * so that if we detect the container running, we will trigger a new
 943              * transition and avoid the unnecessary recovery.
 944              */
 945             pcmk__order_resource_actions(rsc->container, RSC_STATUS, rsc,
 946                                          RSC_STOP, pe_order_optional);
 947 
 948         /* A user can specify that a resource must start on a Pacemaker Remote
 949          * node by explicitly configuring it with the container=NODENAME
 950          * meta-attribute. This is of questionable merit, since location
 951          * constraints can accomplish the same thing. But we support it, so here
 952          * we check whether a resource (that is not itself a remote connection)
 953          * has container set to a remote node or guest node resource.
 954          */
 955         } else if (rsc->container->is_remote_node) {
 956             remote_rsc = rsc->container;
 957         } else  {
 958             remote_rsc = pe__resource_contains_guest_node(rsc->cluster,
 959                                                           rsc->container);
 960         }
 961 
 962         if (remote_rsc != NULL) {
 963             /* Force the resource on the Pacemaker Remote node instead of
 964              * colocating the resource with the container resource.
 965              */
 966             for (GList *item = allowed_nodes; item; item = item->next) {
 967                 pe_node_t *node = item->data;
 968 
 969                 if (node->details->remote_rsc != remote_rsc) {
 970                     node->weight = -INFINITY;
 971                 }
 972             }
 973 
 974         } else {
 975             /* This resource is either a filler for a container that does NOT
 976              * represent a Pacemaker Remote node, or a Pacemaker Remote
 977              * connection resource for a guest node or bundle.
 978              */
 979             int score;
 980 
 981             crm_trace("Order and colocate %s relative to its container %s",
 982                       rsc->id, rsc->container->id);
 983 
 984             pcmk__new_ordering(rsc->container,
 985                                pcmk__op_key(rsc->container->id, RSC_START, 0),
 986                                NULL, rsc, pcmk__op_key(rsc->id, RSC_START, 0),
 987                                NULL,
 988                                pe_order_implies_then|pe_order_runnable_left,
 989                                rsc->cluster);
 990 
 991             pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_STOP, 0), NULL,
 992                                rsc->container,
 993                                pcmk__op_key(rsc->container->id, RSC_STOP, 0),
 994                                NULL, pe_order_implies_first, rsc->cluster);
 995 
 996             if (pcmk_is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
 997                 score = 10000;    /* Highly preferred but not essential */
 998             } else {
 999                 score = INFINITY; /* Force them to run on the same host */
1000             }
1001             pcmk__new_colocation("resource-with-container", NULL, score, rsc,
1002                                  rsc->container, NULL, NULL, true,
1003                                  rsc->cluster);
1004         }
1005     }
1006 
1007     if (rsc->is_remote_node || pcmk_is_set(rsc->flags, pe_rsc_fence_device)) {
1008         /* Remote connections and fencing devices are not allowed to run on
1009          * Pacemaker Remote nodes
1010          */
1011         rsc_avoids_remote_nodes(rsc);
1012     }
1013     g_list_free(allowed_nodes);
1014 }
1015 
1016 /*!
1017  * \internal
1018  * \brief Apply a colocation's score to node weights or resource priority
1019  *
1020  * Given a colocation constraint, apply its score to the dependent's
1021  * allowed node weights (if we are still placing resources) or priority (if
1022  * we are choosing promotable clone instance roles).
1023  *
1024  * \param[in,out] dependent      Dependent resource in colocation
1025  * \param[in]     primary        Primary resource in colocation
1026  * \param[in]     colocation     Colocation constraint to apply
1027  * \param[in] for_dependent  true if called on behalf of dependent
1028  */
1029 void
1030 pcmk__primitive_apply_coloc_score(pe_resource_t *dependent,
     /* [previous][next][first][last][top][bottom][index][help] */
1031                                   const pe_resource_t *primary,
1032                                   const pcmk__colocation_t *colocation,
1033                                   bool for_dependent)
1034 {
1035     enum pcmk__coloc_affects filter_results;
1036 
1037     CRM_CHECK((colocation != NULL) && (dependent != NULL) && (primary != NULL),
1038               return);
1039 
1040     if (for_dependent) {
1041         // Always process on behalf of primary resource
1042         primary->cmds->apply_coloc_score(dependent, primary, colocation, false);
1043         return;
1044     }
1045 
1046     filter_results = pcmk__colocation_affects(dependent, primary, colocation,
1047                                               false);
1048     pe_rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
1049                  ((colocation->score > 0)? "Colocating" : "Anti-colocating"),
1050                  dependent->id, primary->id, colocation->id, colocation->score,
1051                  filter_results);
1052 
1053     switch (filter_results) {
1054         case pcmk__coloc_affects_role:
1055             pcmk__apply_coloc_to_priority(dependent, primary, colocation);
1056             break;
1057         case pcmk__coloc_affects_location:
1058             pcmk__apply_coloc_to_weights(dependent, primary, colocation);
1059             break;
1060         default: // pcmk__coloc_affects_nothing
1061             return;
1062     }
1063 }
1064 
1065 /* Primitive implementation of
1066  * resource_alloc_functions_t:with_this_colocations()
1067  */
1068 void
1069 pcmk__with_primitive_colocations(const pe_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1070                                  const pe_resource_t *orig_rsc, GList **list)
1071 {
1072     // Primitives don't have children, so rsc should also be orig_rsc
1073     CRM_CHECK((rsc != NULL) && (rsc->variant == pe_native)
1074               && (rsc == orig_rsc) && (list != NULL),
1075               return);
1076 
1077     // Add primitive's own colocations plus any relevant ones from parent
1078     pcmk__add_with_this_list(list, rsc->rsc_cons_lhs);
1079     if (rsc->parent != NULL) {
1080         rsc->parent->cmds->with_this_colocations(rsc->parent, rsc, list);
1081     }
1082 }
1083 
1084 /* Primitive implementation of
1085  * resource_alloc_functions_t:this_with_colocations()
1086  */
1087 void
1088 pcmk__primitive_with_colocations(const pe_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1089                                  const pe_resource_t *orig_rsc, GList **list)
1090 {
1091     // Primitives don't have children, so rsc should also be orig_rsc
1092     CRM_CHECK((rsc != NULL) && (rsc->variant == pe_native)
1093               && (rsc == orig_rsc) && (list != NULL),
1094               return);
1095 
1096     // Add primitive's own colocations plus any relevant ones from parent
1097     pcmk__add_this_with_list(list, rsc->rsc_cons);
1098     if (rsc->parent != NULL) {
1099         rsc->parent->cmds->this_with_colocations(rsc->parent, rsc, list);
1100     }
1101 }
1102 
1103 /*!
1104  * \internal
1105  * \brief Return action flags for a given primitive resource action
1106  *
1107  * \param[in,out] action  Action to get flags for
1108  * \param[in]     node    If not NULL, limit effects to this node (ignored)
1109  *
1110  * \return Flags appropriate to \p action on \p node
1111  */
1112 enum pe_action_flags
1113 pcmk__primitive_action_flags(pe_action_t *action, const pe_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
1114 {
1115     CRM_ASSERT(action != NULL);
1116     return action->flags;
1117 }
1118 
1119 /*!
1120  * \internal
1121  * \brief Check whether a node is a multiply active resource's expected node
1122  *
1123  * \param[in] rsc  Resource to check
1124  * \param[in] node  Node to check
1125  *
1126  * \return true if \p rsc is multiply active with multiple-active set to
1127  *         stop_unexpected, and \p node is the node where it will remain active
1128  * \note This assumes that the resource's next role cannot be changed to stopped
1129  *       after this is called, which should be reasonable if status has already
1130  *       been unpacked and resources have been assigned to nodes.
1131  */
1132 static bool
1133 is_expected_node(const pe_resource_t *rsc, const pe_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
1134 {
1135     return pcmk_all_flags_set(rsc->flags,
1136                               pe_rsc_stop_unexpected|pe_rsc_restarting)
1137            && (rsc->next_role > RSC_ROLE_STOPPED)
1138            && pe__same_node(rsc->allocated_to, node);
1139 }
1140 
1141 /*!
1142  * \internal
1143  * \brief Schedule actions needed to stop a resource wherever it is active
1144  *
1145  * \param[in,out] rsc       Resource being stopped
1146  * \param[in]     node      Node where resource is being stopped (ignored)
1147  * \param[in]     optional  Whether actions should be optional
1148  */
1149 static void
1150 stop_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1151 {
1152     for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
1153         pe_node_t *current = (pe_node_t *) iter->data;
1154         pe_action_t *stop = NULL;
1155 
1156         if (is_expected_node(rsc, current)) {
1157             /* We are scheduling restart actions for a multiply active resource
1158              * with multiple-active=stop_unexpected, and this is where it should
1159              * not be stopped.
1160              */
1161             pe_rsc_trace(rsc,
1162                          "Skipping stop of multiply active resource %s "
1163                          "on expected node %s",
1164                          rsc->id, pe__node_name(current));
1165             continue;
1166         }
1167 
1168         if (rsc->partial_migration_target != NULL) {
1169             // Continue migration if node originally was and remains target
1170             if (pe__same_node(current, rsc->partial_migration_target)
1171                 && pe__same_node(current, rsc->allocated_to)) {
1172                 pe_rsc_trace(rsc,
1173                              "Skipping stop of %s on %s "
1174                              "because partial migration there will continue",
1175                              rsc->id, pe__node_name(current));
1176                 continue;
1177             } else {
1178                 pe_rsc_trace(rsc,
1179                              "Forcing stop of %s on %s "
1180                              "because migration target changed",
1181                              rsc->id, pe__node_name(current));
1182                 optional = false;
1183             }
1184         }
1185 
1186         pe_rsc_trace(rsc, "Scheduling stop of %s on %s",
1187                      rsc->id, pe__node_name(current));
1188         stop = stop_action(rsc, current, optional);
1189 
1190         if (rsc->allocated_to == NULL) {
1191             pe_action_set_reason(stop, "node availability", true);
1192         } else if (pcmk_all_flags_set(rsc->flags, pe_rsc_restarting
1193                                                   |pe_rsc_stop_unexpected)) {
1194             /* We are stopping a multiply active resource on a node that is
1195              * not its expected node, and we are still scheduling restart
1196              * actions, so the stop is for being multiply active.
1197              */
1198             pe_action_set_reason(stop, "being multiply active", true);
1199         }
1200 
1201         if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
1202             pe__clear_action_flags(stop, pe_action_runnable);
1203         }
1204 
1205         if (pcmk_is_set(rsc->cluster->flags, pe_flag_remove_after_stop)) {
1206             pcmk__schedule_cleanup(rsc, current, optional);
1207         }
1208 
1209         if (pcmk_is_set(rsc->flags, pe_rsc_needs_unfencing)) {
1210             pe_action_t *unfence = pe_fence_op(current, "on", true, NULL, false,
1211                                                rsc->cluster);
1212 
1213             order_actions(stop, unfence, pe_order_implies_first);
1214             if (!pcmk__node_unfenced(current)) {
1215                 pe_proc_err("Stopping %s until %s can be unfenced",
1216                             rsc->id, pe__node_name(current));
1217             }
1218         }
1219     }
1220 }
1221 
1222 /*!
1223  * \internal
1224  * \brief Schedule actions needed to start a resource on a node
1225  *
1226  * \param[in,out] rsc       Resource being started
1227  * \param[in,out] node      Node where resource should be started
1228  * \param[in]     optional  Whether actions should be optional
1229  */
1230 static void
1231 start_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1232 {
1233     pe_action_t *start = NULL;
1234 
1235     CRM_ASSERT(node != NULL);
1236 
1237     pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)",
1238                  (optional? "optional" : "required"), rsc->id,
1239                  pe__node_name(node), node->weight);
1240     start = start_action(rsc, node, TRUE);
1241 
1242     pcmk__order_vs_unfence(rsc, node, start, pe_order_implies_then);
1243 
1244     if (pcmk_is_set(start->flags, pe_action_runnable) && !optional) {
1245         pe__clear_action_flags(start, pe_action_optional);
1246     }
1247 
1248     if (is_expected_node(rsc, node)) {
1249         /* This could be a problem if the start becomes necessary for other
1250          * reasons later.
1251          */
1252         pe_rsc_trace(rsc,
1253                      "Start of multiply active resouce %s "
1254                      "on expected node %s will be a pseudo-action",
1255                      rsc->id, pe__node_name(node));
1256         pe__set_action_flags(start, pe_action_pseudo);
1257     }
1258 }
1259 
1260 /*!
1261  * \internal
1262  * \brief Schedule actions needed to promote a resource on a node
1263  *
1264  * \param[in,out] rsc       Resource being promoted
1265  * \param[in]     node      Node where resource should be promoted
1266  * \param[in]     optional  Whether actions should be optional
1267  */
1268 static void
1269 promote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1270 {
1271     GList *iter = NULL;
1272     GList *action_list = NULL;
1273     bool runnable = true;
1274 
1275     CRM_ASSERT(node != NULL);
1276 
1277     // Any start must be runnable for promotion to be runnable
1278     action_list = pe__resource_actions(rsc, node, RSC_START, true);
1279     for (iter = action_list; iter != NULL; iter = iter->next) {
1280         pe_action_t *start = (pe_action_t *) iter->data;
1281 
1282         if (!pcmk_is_set(start->flags, pe_action_runnable)) {
1283             runnable = false;
1284         }
1285     }
1286     g_list_free(action_list);
1287 
1288     if (runnable) {
1289         pe_action_t *promote = promote_action(rsc, node, optional);
1290 
1291         pe_rsc_trace(rsc, "Scheduling %s promotion of %s on %s",
1292                      (optional? "optional" : "required"), rsc->id,
1293                      pe__node_name(node));
1294 
1295         if (is_expected_node(rsc, node)) {
1296             /* This could be a problem if the promote becomes necessary for
1297              * other reasons later.
1298              */
1299             pe_rsc_trace(rsc,
1300                          "Promotion of multiply active resouce %s "
1301                          "on expected node %s will be a pseudo-action",
1302                          rsc->id, pe__node_name(node));
1303             pe__set_action_flags(promote, pe_action_pseudo);
1304         }
1305     } else {
1306         pe_rsc_trace(rsc, "Not promoting %s on %s: start unrunnable",
1307                      rsc->id, pe__node_name(node));
1308         action_list = pe__resource_actions(rsc, node, RSC_PROMOTE, true);
1309         for (iter = action_list; iter != NULL; iter = iter->next) {
1310             pe_action_t *promote = (pe_action_t *) iter->data;
1311 
1312             pe__clear_action_flags(promote, pe_action_runnable);
1313         }
1314         g_list_free(action_list);
1315     }
1316 }
1317 
1318 /*!
1319  * \internal
1320  * \brief Schedule actions needed to demote a resource wherever it is active
1321  *
1322  * \param[in,out] rsc       Resource being demoted
1323  * \param[in]     node      Node where resource should be demoted (ignored)
1324  * \param[in]     optional  Whether actions should be optional
1325  */
1326 static void
1327 demote_resource(pe_resource_t *rsc, pe_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1328 {
1329     /* Since this will only be called for a primitive (possibly as an instance
1330      * of a collective resource), the resource is multiply active if it is
1331      * running on more than one node, so we want to demote on all of them as
1332      * part of recovery, regardless of which one is the desired node.
1333      */
1334     for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
1335         pe_node_t *current = (pe_node_t *) iter->data;
1336 
1337         if (is_expected_node(rsc, current)) {
1338             pe_rsc_trace(rsc,
1339                          "Skipping demote of multiply active resource %s "
1340                          "on expected node %s",
1341                          rsc->id, pe__node_name(current));
1342         } else {
1343             pe_rsc_trace(rsc, "Scheduling %s demotion of %s on %s",
1344                          (optional? "optional" : "required"), rsc->id,
1345                          pe__node_name(current));
1346             demote_action(rsc, current, optional);
1347         }
1348     }
1349 }
1350 
1351 static void
1352 assert_role_error(pe_resource_t *rsc, pe_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1353 {
1354     CRM_ASSERT(false);
1355 }
1356 
1357 /*!
1358  * \internal
1359  * \brief Schedule cleanup of a resource
1360  *
1361  * \param[in,out] rsc       Resource to clean up
1362  * \param[in]     node      Node to clean up on
1363  * \param[in]     optional  Whether clean-up should be optional
1364  */
1365 void
1366 pcmk__schedule_cleanup(pe_resource_t *rsc, const pe_node_t *node, bool optional)
     /* [previous][next][first][last][top][bottom][index][help] */
1367 {
1368     /* If the cleanup is required, its orderings are optional, because they're
1369      * relevant only if both actions are required. Conversely, if the cleanup is
1370      * optional, the orderings make the then action required if the first action
1371      * becomes required.
1372      */
1373     uint32_t flag = optional? pe_order_implies_then : pe_order_optional;
1374 
1375     CRM_CHECK((rsc != NULL) && (node != NULL), return);
1376 
1377     if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
1378         pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed",
1379                      rsc->id, pe__node_name(node));
1380         return;
1381     }
1382 
1383     if (node->details->unclean || !node->details->online) {
1384         pe_rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable",
1385                      rsc->id, pe__node_name(node));
1386         return;
1387     }
1388 
1389     crm_notice("Scheduling clean-up of %s on %s", rsc->id, pe__node_name(node));
1390     delete_action(rsc, node, optional);
1391 
1392     // stop -> clean-up -> start
1393     pcmk__order_resource_actions(rsc, RSC_STOP, rsc, RSC_DELETE, flag);
1394     pcmk__order_resource_actions(rsc, RSC_DELETE, rsc, RSC_START, flag);
1395 }
1396 
1397 /*!
1398  * \internal
1399  * \brief Add primitive meta-attributes relevant to graph actions to XML
1400  *
1401  * \param[in]     rsc  Primitive resource whose meta-attributes should be added
1402  * \param[in,out] xml  Transition graph action attributes XML to add to
1403  */
1404 void
1405 pcmk__primitive_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml)
     /* [previous][next][first][last][top][bottom][index][help] */
1406 {
1407     char *name = NULL;
1408     char *value = NULL;
1409     const pe_resource_t *parent = NULL;
1410 
1411     CRM_ASSERT((rsc != NULL) && (xml != NULL));
1412 
1413     /* Clone instance numbers get set internally as meta-attributes, and are
1414      * needed in the transition graph (for example, to tell unique clone
1415      * instances apart).
1416      */
1417     value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION);
1418     if (value != NULL) {
1419         name = crm_meta_name(XML_RSC_ATTR_INCARNATION);
1420         crm_xml_add(xml, name, value);
1421         free(name);
1422     }
1423 
1424     // Not sure if this one is really needed ...
1425     value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE);
1426     if (value != NULL) {
1427         name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE);
1428         crm_xml_add(xml, name, value);
1429         free(name);
1430     }
1431 
1432     /* The container meta-attribute can be set on the primitive itself or one of
1433      * its parents (for example, a group inside a container resource), so check
1434      * them all, and keep the highest one found.
1435      */
1436     for (parent = rsc; parent != NULL; parent = parent->parent) {
1437         if (parent->container != NULL) {
1438             crm_xml_add(xml, CRM_META "_" XML_RSC_ATTR_CONTAINER,
1439                         parent->container->id);
1440         }
1441     }
1442 
1443     /* Bundle replica children will get their external-ip set internally as a
1444      * meta-attribute. The graph action needs it, but under a different naming
1445      * convention than other meta-attributes.
1446      */
1447     value = g_hash_table_lookup(rsc->meta, "external-ip");
1448     if (value != NULL) {
1449         crm_xml_add(xml, "pcmk_external_ip", value);
1450     }
1451 }
1452 
1453 // Primitive implementation of resource_alloc_functions_t:add_utilization()
1454 void
1455 pcmk__primitive_add_utilization(const pe_resource_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1456                                 const pe_resource_t *orig_rsc, GList *all_rscs,
1457                                 GHashTable *utilization)
1458 {
1459     if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
1460         return;
1461     }
1462 
1463     pe_rsc_trace(orig_rsc, "%s: Adding primitive %s as colocated utilization",
1464                  orig_rsc->id, rsc->id);
1465     pcmk__release_node_capacity(utilization, rsc);
1466 }
1467 
1468 /*!
1469  * \internal
1470  * \brief Get epoch time of node's shutdown attribute (or now if none)
1471  *
1472  * \param[in,out] node  Node to check
1473  *
1474  * \return Epoch time corresponding to shutdown attribute if set or now if not
1475  */
1476 static time_t
1477 shutdown_time(pe_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
1478 {
1479     const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
1480     time_t result = 0;
1481 
1482     if (shutdown != NULL) {
1483         long long result_ll;
1484 
1485         if (pcmk__scan_ll(shutdown, &result_ll, 0LL) == pcmk_rc_ok) {
1486             result = (time_t) result_ll;
1487         }
1488     }
1489     return (result == 0)? get_effective_time(node->details->data_set) : result;
1490 }
1491 
1492 /*!
1493  * \internal
1494  * \brief Ban a resource from a node if it's not locked to the node
1495  *
1496  * \param[in]     data       Node to check
1497  * \param[in,out] user_data  Resource to check
1498  */
1499 static void
1500 ban_if_not_locked(gpointer data, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1501 {
1502     const pe_node_t *node = (const pe_node_t *) data;
1503     pe_resource_t *rsc = (pe_resource_t *) user_data;
1504 
1505     if (strcmp(node->details->uname, rsc->lock_node->details->uname) != 0) {
1506         resource_location(rsc, node, -CRM_SCORE_INFINITY,
1507                           XML_CONFIG_ATTR_SHUTDOWN_LOCK, rsc->cluster);
1508     }
1509 }
1510 
1511 // Primitive implementation of resource_alloc_functions_t:shutdown_lock()
1512 void
1513 pcmk__primitive_shutdown_lock(pe_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1514 {
1515     const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
1516 
1517     // Fence devices and remote connections can't be locked
1518     if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_null_matches)
1519         || pe__resource_is_remote_conn(rsc, rsc->cluster)) {
1520         return;
1521     }
1522 
1523     if (rsc->lock_node != NULL) {
1524         // The lock was obtained from resource history
1525 
1526         if (rsc->running_on != NULL) {
1527             /* The resource was started elsewhere even though it is now
1528              * considered locked. This shouldn't be possible, but as a
1529              * failsafe, we don't want to disturb the resource now.
1530              */
1531             pe_rsc_info(rsc,
1532                         "Cancelling shutdown lock because %s is already active",
1533                         rsc->id);
1534             pe__clear_resource_history(rsc, rsc->lock_node, rsc->cluster);
1535             rsc->lock_node = NULL;
1536             rsc->lock_time = 0;
1537         }
1538 
1539     // Only a resource active on exactly one node can be locked
1540     } else if (pcmk__list_of_1(rsc->running_on)) {
1541         pe_node_t *node = rsc->running_on->data;
1542 
1543         if (node->details->shutdown) {
1544             if (node->details->unclean) {
1545                 pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
1546                              rsc->id, pe__node_name(node));
1547             } else {
1548                 rsc->lock_node = node;
1549                 rsc->lock_time = shutdown_time(node);
1550             }
1551         }
1552     }
1553 
1554     if (rsc->lock_node == NULL) {
1555         // No lock needed
1556         return;
1557     }
1558 
1559     if (rsc->cluster->shutdown_lock > 0) {
1560         time_t lock_expiration = rsc->lock_time + rsc->cluster->shutdown_lock;
1561 
1562         pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
1563                     rsc->id, pe__node_name(rsc->lock_node),
1564                     (long long) lock_expiration);
1565         pe__update_recheck_time(++lock_expiration, rsc->cluster);
1566     } else {
1567         pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
1568                     rsc->id, pe__node_name(rsc->lock_node));
1569     }
1570 
1571     // If resource is locked to one node, ban it from all other nodes
1572     g_list_foreach(rsc->cluster->nodes, ban_if_not_locked, rsc);
1573 }

/* [previous][next][first][last][top][bottom][index][help] */