pacemaker  2.1.7-0f7f88312f
Scalable High-Availability cluster resource manager
pcmk_sched_promotable.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2023 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <crm/msg_xml.h>
13 #include <pacemaker-internal.h>
14 
15 #include "libpacemaker_private.h"
16 
25 static void
26 order_instance_promotion(pcmk_resource_t *clone, pcmk_resource_t *child,
27  pcmk_resource_t *last)
28 {
29  // "Promote clone" -> promote instance -> "clone promoted"
31  child, PCMK_ACTION_PROMOTE,
34  clone, PCMK_ACTION_PROMOTED,
36 
37  // If clone is ordered, order this instance relative to last
38  if ((last != NULL) && pe__clone_is_ordered(clone)) {
40  child, PCMK_ACTION_PROMOTE,
42  }
43 }
44 
53 static void
54 order_instance_demotion(pcmk_resource_t *clone, pcmk_resource_t *child,
55  pcmk_resource_t *last)
56 {
57  // "Demote clone" -> demote instance -> "clone demoted"
62  clone, PCMK_ACTION_DEMOTED,
64 
65  // If clone is ordered, order this instance relative to last
66  if ((last != NULL) && pe__clone_is_ordered(clone)) {
69  }
70 }
71 
80 static void
81 check_for_role_change(const pcmk_resource_t *rsc, bool *demoting,
82  bool *promoting)
83 {
84  const GList *iter = NULL;
85 
86  // If this is a cloned group, check group members recursively
87  if (rsc->children != NULL) {
88  for (iter = rsc->children; iter != NULL; iter = iter->next) {
89  check_for_role_change((const pcmk_resource_t *) iter->data,
90  demoting, promoting);
91  }
92  return;
93  }
94 
95  for (iter = rsc->actions; iter != NULL; iter = iter->next) {
96  const pcmk_action_t *action = (const pcmk_action_t *) iter->data;
97 
98  if (*promoting && *demoting) {
99  return;
100 
101  } else if (pcmk_is_set(action->flags, pcmk_action_optional)) {
102  continue;
103 
104  } else if (pcmk__str_eq(PCMK_ACTION_DEMOTE, action->task,
105  pcmk__str_none)) {
106  *demoting = true;
107 
108  } else if (pcmk__str_eq(PCMK_ACTION_PROMOTE, action->task,
109  pcmk__str_none)) {
110  *promoting = true;
111  }
112  }
113 }
114 
127 static void
128 apply_promoted_locations(pcmk_resource_t *child,
129  const GList *location_constraints,
130  const pcmk_node_t *chosen)
131 {
132  for (const GList *iter = location_constraints; iter; iter = iter->next) {
133  const pe__location_t *location = iter->data;
134  const pcmk_node_t *constraint_node = NULL;
135 
136  if (location->role_filter == pcmk_role_promoted) {
137  constraint_node = pe_find_node_id(location->node_list_rh,
138  chosen->details->id);
139  }
140  if (constraint_node != NULL) {
141  int new_priority = pcmk__add_scores(child->priority,
142  constraint_node->weight);
143 
144  pe_rsc_trace(child,
145  "Applying location %s to %s promotion priority on %s: "
146  "%s + %s = %s",
147  location->id, child->id,
148  pe__node_name(constraint_node),
150  pcmk_readable_score(constraint_node->weight),
151  pcmk_readable_score(new_priority));
152  child->priority = new_priority;
153  }
154  }
155 }
156 
165 static pcmk_node_t *
166 node_to_be_promoted_on(const pcmk_resource_t *rsc)
167 {
168  pcmk_node_t *node = NULL;
169  pcmk_node_t *local_node = NULL;
170  const pcmk_resource_t *parent = NULL;
171 
172  // If this is a cloned group, bail if any group member can't be promoted
173  for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
174  pcmk_resource_t *child = (pcmk_resource_t *) iter->data;
175 
176  if (node_to_be_promoted_on(child) == NULL) {
177  pe_rsc_trace(rsc,
178  "%s can't be promoted because member %s can't",
179  rsc->id, child->id);
180  return NULL;
181  }
182  }
183 
184  node = rsc->fns->location(rsc, NULL, FALSE);
185  if (node == NULL) {
186  pe_rsc_trace(rsc, "%s can't be promoted because it won't be active",
187  rsc->id);
188  return NULL;
189 
190  } else if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
191  if (rsc->fns->state(rsc, TRUE) == pcmk_role_promoted) {
192  crm_notice("Unmanaged instance %s will be left promoted on %s",
193  rsc->id, pe__node_name(node));
194  } else {
195  pe_rsc_trace(rsc, "%s can't be promoted because it is unmanaged",
196  rsc->id);
197  return NULL;
198  }
199 
200  } else if (rsc->priority < 0) {
201  pe_rsc_trace(rsc,
202  "%s can't be promoted because its promotion priority %d "
203  "is negative",
204  rsc->id, rsc->priority);
205  return NULL;
206 
207  } else if (!pcmk__node_available(node, false, true)) {
208  pe_rsc_trace(rsc, "%s can't be promoted because %s can't run resources",
209  rsc->id, pe__node_name(node));
210  return NULL;
211  }
212 
213  parent = pe__const_top_resource(rsc, false);
214  local_node = g_hash_table_lookup(parent->allowed_nodes, node->details->id);
215 
216  if (local_node == NULL) {
217  /* It should not be possible for the scheduler to have assigned the
218  * instance to a node where its parent is not allowed, but it's good to
219  * have a fail-safe.
220  */
221  if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
222  crm_warn("%s can't be promoted because %s is not allowed on %s "
223  "(scheduler bug?)",
224  rsc->id, parent->id, pe__node_name(node));
225  } // else the instance is unmanaged and already promoted
226  return NULL;
227 
228  } else if ((local_node->count >= pe__clone_promoted_node_max(parent))
229  && pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
230  pe_rsc_trace(rsc,
231  "%s can't be promoted because %s has "
232  "maximum promoted instances already",
233  rsc->id, pe__node_name(node));
234  return NULL;
235  }
236 
237  return local_node;
238 }
239 
251 static gint
252 cmp_promotable_instance(gconstpointer a, gconstpointer b)
253 {
254  const pcmk_resource_t *rsc1 = (const pcmk_resource_t *) a;
255  const pcmk_resource_t *rsc2 = (const pcmk_resource_t *) b;
256 
257  enum rsc_role_e role1 = pcmk_role_unknown;
258  enum rsc_role_e role2 = pcmk_role_unknown;
259 
260  CRM_ASSERT((rsc1 != NULL) && (rsc2 != NULL));
261 
262  // Check sort index set by pcmk__set_instance_roles()
263  if (rsc1->sort_index > rsc2->sort_index) {
265  "%s has higher promotion priority than %s "
266  "(sort index %d > %d)",
268  return -1;
269  } else if (rsc1->sort_index < rsc2->sort_index) {
271  "%s has lower promotion priority than %s "
272  "(sort index %d < %d)",
274  return 1;
275  }
276 
277  // If those are the same, prefer instance whose current role is higher
278  role1 = rsc1->fns->state(rsc1, TRUE);
279  role2 = rsc2->fns->state(rsc2, TRUE);
280  if (role1 > role2) {
282  "%s has higher promotion priority than %s "
283  "(higher current role)",
284  rsc1->id, rsc2->id);
285  return -1;
286  } else if (role1 < role2) {
288  "%s has lower promotion priority than %s "
289  "(lower current role)",
290  rsc1->id, rsc2->id);
291  return 1;
292  }
293 
294  // Finally, do normal clone instance sorting
295  return pcmk__cmp_instance(a, b);
296 }
297 
309 static void
310 add_sort_index_to_node_score(gpointer data, gpointer user_data)
311 {
312  const pcmk_resource_t *child = (const pcmk_resource_t *) data;
313  pcmk_resource_t *clone = (pcmk_resource_t *) user_data;
314 
315  pcmk_node_t *node = NULL;
316  const pcmk_node_t *chosen = NULL;
317 
318  if (child->sort_index < 0) {
319  pe_rsc_trace(clone, "Not adding sort index of %s: negative", child->id);
320  return;
321  }
322 
323  chosen = child->fns->location(child, NULL, FALSE);
324  if (chosen == NULL) {
325  pe_rsc_trace(clone, "Not adding sort index of %s: inactive", child->id);
326  return;
327  }
328 
329  node = g_hash_table_lookup(clone->allowed_nodes, chosen->details->id);
330  CRM_ASSERT(node != NULL);
331 
332  node->weight = pcmk__add_scores(child->sort_index, node->weight);
333  pe_rsc_trace(clone,
334  "Added cumulative priority of %s (%s) to score on %s (now %s)",
335  child->id, pcmk_readable_score(child->sort_index),
336  pe__node_name(node), pcmk_readable_score(node->weight));
337 }
338 
346 static void
347 apply_coloc_to_dependent(gpointer data, gpointer user_data)
348 {
349  pcmk__colocation_t *colocation = data;
350  pcmk_resource_t *clone = user_data;
351  pcmk_resource_t *primary = colocation->primary;
353  float factor = colocation->score / (float) INFINITY;
354 
355  if (colocation->dependent_role != pcmk_role_promoted) {
356  return;
357  }
358  if (colocation->score < INFINITY) {
360  }
361  pe_rsc_trace(clone, "Applying colocation %s (promoted %s with %s) @%s",
362  colocation->id, colocation->dependent->id,
363  colocation->primary->id,
364  pcmk_readable_score(colocation->score));
365  primary->cmds->add_colocated_node_scores(primary, clone, clone->id,
366  &clone->allowed_nodes, colocation,
367  factor, flags);
368 }
369 
377 static void
378 apply_coloc_to_primary(gpointer data, gpointer user_data)
379 {
380  pcmk__colocation_t *colocation = data;
381  pcmk_resource_t *clone = user_data;
382  pcmk_resource_t *dependent = colocation->dependent;
383  const float factor = colocation->score / (float) INFINITY;
384  const uint32_t flags = pcmk__coloc_select_active
386 
387  if ((colocation->primary_role != pcmk_role_promoted)
388  || !pcmk__colocation_has_influence(colocation, NULL)) {
389  return;
390  }
391 
392  pe_rsc_trace(clone, "Applying colocation %s (%s with promoted %s) @%s",
393  colocation->id, colocation->dependent->id,
394  colocation->primary->id,
395  pcmk_readable_score(colocation->score));
396  dependent->cmds->add_colocated_node_scores(dependent, clone, clone->id,
397  &clone->allowed_nodes,
398  colocation, factor, flags);
399 }
400 
408 static void
409 set_sort_index_to_node_score(gpointer data, gpointer user_data)
410 {
411  pcmk_resource_t *child = (pcmk_resource_t *) data;
412  const pcmk_resource_t *clone = (const pcmk_resource_t *) user_data;
413 
414  pcmk_node_t *chosen = child->fns->location(child, NULL, FALSE);
415 
416  if (!pcmk_is_set(child->flags, pcmk_rsc_managed)
417  && (child->next_role == pcmk_role_promoted)) {
418  child->sort_index = INFINITY;
419  pe_rsc_trace(clone,
420  "Final sort index for %s is INFINITY (unmanaged promoted)",
421  child->id);
422 
423  } else if ((chosen == NULL) || (child->sort_index < 0)) {
424  pe_rsc_trace(clone,
425  "Final sort index for %s is %d (ignoring node score)",
426  child->id, child->sort_index);
427 
428  } else {
429  const pcmk_node_t *node = g_hash_table_lookup(clone->allowed_nodes,
430  chosen->details->id);
431 
432  CRM_ASSERT(node != NULL);
433  child->sort_index = node->weight;
434  pe_rsc_trace(clone,
435  "Adding scores for %s: final sort index for %s is %d",
436  clone->id, child->id, child->sort_index);
437  }
438 }
439 
446 static void
447 sort_promotable_instances(pcmk_resource_t *clone)
448 {
449  GList *colocations = NULL;
450 
452  == pcmk_rc_already) {
453  return;
454  }
456 
457  for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
458  pcmk_resource_t *child = (pcmk_resource_t *) iter->data;
459 
460  pe_rsc_trace(clone,
461  "Adding scores for %s: initial sort index for %s is %d",
462  clone->id, child->id, child->sort_index);
463  }
464  pe__show_node_scores(true, clone, "Before", clone->allowed_nodes,
465  clone->cluster);
466 
467  g_list_foreach(clone->children, add_sort_index_to_node_score, clone);
468 
469  colocations = pcmk__this_with_colocations(clone);
470  g_list_foreach(colocations, apply_coloc_to_dependent, clone);
471  g_list_free(colocations);
472 
473  colocations = pcmk__with_this_colocations(clone);
474  g_list_foreach(colocations, apply_coloc_to_primary, clone);
475  g_list_free(colocations);
476 
477  // Ban resource from all nodes if it needs a ticket but doesn't have it
479 
480  pe__show_node_scores(true, clone, "After", clone->allowed_nodes,
481  clone->cluster);
482 
483  // Reset sort indexes to final node scores
484  g_list_foreach(clone->children, set_sort_index_to_node_score, clone);
485 
486  // Finally, sort instances in descending order of promotion priority
487  clone->children = g_list_sort(clone->children, cmp_promotable_instance);
489 }
490 
501 static pcmk_resource_t *
502 find_active_anon_instance(const pcmk_resource_t *clone, const char *id,
503  const pcmk_node_t *node)
504 {
505  for (GList *iter = clone->children; iter; iter = iter->next) {
506  pcmk_resource_t *child = iter->data;
507  pcmk_resource_t *active = NULL;
508 
509  // Use ->find_rsc() in case this is a cloned group
510  active = clone->fns->find_rsc(child, id, node,
513  if (active != NULL) {
514  return active;
515  }
516  }
517  return NULL;
518 }
519 
520 /*
521  * \brief Check whether an anonymous clone instance is known on a node
522  *
523  * \param[in] clone Anonymous clone to check
524  * \param[in] id Instance ID (without instance number) to check
525  * \param[in] node Node to check
526  *
527  * \return true if \p id instance of \p clone is known on \p node,
528  * otherwise false
529  */
530 static bool
531 anonymous_known_on(const pcmk_resource_t *clone, const char *id,
532  const pcmk_node_t *node)
533 {
534  for (GList *iter = clone->children; iter; iter = iter->next) {
535  pcmk_resource_t *child = iter->data;
536 
537  /* Use ->find_rsc() because this might be a cloned group, and knowing
538  * that other members of the group are known here implies nothing.
539  */
540  child = clone->fns->find_rsc(child, id, NULL,
542  CRM_LOG_ASSERT(child != NULL);
543  if (child != NULL) {
544  if (g_hash_table_lookup(child->known_on, node->details->id)) {
545  return true;
546  }
547  }
548  }
549  return false;
550 }
551 
561 static bool
562 is_allowed(const pcmk_resource_t *rsc, const pcmk_node_t *node)
563 {
564  pcmk_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes,
565  node->details->id);
566 
567  return (allowed != NULL) && (allowed->weight >= 0);
568 }
569 
579 static bool
580 promotion_score_applies(const pcmk_resource_t *rsc, const pcmk_node_t *node)
581 {
582  char *id = clone_strip(rsc->id);
583  const pcmk_resource_t *parent = pe__const_top_resource(rsc, false);
584  pcmk_resource_t *active = NULL;
585  const char *reason = "allowed";
586 
587  // Some checks apply only to anonymous clone instances
588  if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
589 
590  // If instance is active on the node, its score definitely applies
591  active = find_active_anon_instance(parent, id, node);
592  if (active == rsc) {
593  reason = "active";
594  goto check_allowed;
595  }
596 
597  /* If *no* instance is active on this node, this instance's score will
598  * count if it has been probed on this node.
599  */
600  if ((active == NULL) && anonymous_known_on(parent, id, node)) {
601  reason = "probed";
602  goto check_allowed;
603  }
604  }
605 
606  /* If this clone's status is unknown on *all* nodes (e.g. cluster startup),
607  * take all instances' scores into account, to make sure we use any
608  * permanent promotion scores.
609  */
610  if ((rsc->running_on == NULL) && (g_hash_table_size(rsc->known_on) == 0)) {
611  reason = "none probed";
612  goto check_allowed;
613  }
614 
615  /* Otherwise, we've probed and/or started the resource *somewhere*, so
616  * consider promotion scores on nodes where we know the status.
617  */
618  if ((g_hash_table_lookup(rsc->known_on, node->details->id) != NULL)
619  || (pe_find_node_id(rsc->running_on, node->details->id) != NULL)) {
620  reason = "known";
621  } else {
622  pe_rsc_trace(rsc,
623  "Ignoring %s promotion score (for %s) on %s: not probed",
624  rsc->id, id, pe__node_name(node));
625  free(id);
626  return false;
627  }
628 
629 check_allowed:
630  if (is_allowed(rsc, node)) {
631  pe_rsc_trace(rsc, "Counting %s promotion score (for %s) on %s: %s",
632  rsc->id, id, pe__node_name(node), reason);
633  free(id);
634  return true;
635  }
636 
637  pe_rsc_trace(rsc, "Ignoring %s promotion score (for %s) on %s: not allowed",
638  rsc->id, id, pe__node_name(node));
639  free(id);
640  return false;
641 }
642 
653 static const char *
654 promotion_attr_value(const pcmk_resource_t *rsc, const pcmk_node_t *node,
655  const char *name)
656 {
657  char *attr_name = NULL;
658  const char *attr_value = NULL;
660 
662  // Not assigned yet
664  }
665  attr_name = pcmk_promotion_score_name(name);
666  attr_value = pe__node_attribute_calculated(node, attr_name, rsc, node_type,
667  false);
668  free(attr_name);
669  return attr_value;
670 }
671 
682 static int
683 promotion_score(const pcmk_resource_t *rsc, const pcmk_node_t *node,
684  bool *is_default)
685 {
686  char *name = NULL;
687  const char *attr_value = NULL;
688 
689  if (is_default != NULL) {
690  *is_default = true;
691  }
692 
693  CRM_CHECK((rsc != NULL) && (node != NULL), return 0);
694 
695  /* If this is an instance of a cloned group, the promotion score is the sum
696  * of all members' promotion scores.
697  */
698  if (rsc->children != NULL) {
699  int score = 0;
700 
701  for (const GList *iter = rsc->children;
702  iter != NULL; iter = iter->next) {
703 
704  const pcmk_resource_t *child = (const pcmk_resource_t *) iter->data;
705  bool child_default = false;
706  int child_score = promotion_score(child, node, &child_default);
707 
708  if (!child_default && (is_default != NULL)) {
709  *is_default = false;
710  }
711  score += child_score;
712  }
713  return score;
714  }
715 
716  if (!promotion_score_applies(rsc, node)) {
717  return 0;
718  }
719 
720  /* For the promotion score attribute name, use the name the resource is
721  * known as in resource history, since that's what crm_attribute --promotion
722  * would have used.
723  */
724  name = (rsc->clone_name == NULL)? rsc->id : rsc->clone_name;
725 
726  attr_value = promotion_attr_value(rsc, node, name);
727  if (attr_value != NULL) {
728  pe_rsc_trace(rsc, "Promotion score for %s on %s = %s",
729  name, pe__node_name(node), pcmk__s(attr_value, "(unset)"));
730  } else if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
731  /* If we don't have any resource history yet, we won't have clone_name.
732  * In that case, for anonymous clones, try the resource name without
733  * any instance number.
734  */
735  name = clone_strip(rsc->id);
736  if (strcmp(rsc->id, name) != 0) {
737  attr_value = promotion_attr_value(rsc, node, name);
738  pe_rsc_trace(rsc, "Promotion score for %s on %s (for %s) = %s",
739  name, pe__node_name(node), rsc->id,
740  pcmk__s(attr_value, "(unset)"));
741  }
742  free(name);
743  }
744 
745  if (attr_value == NULL) {
746  return 0;
747  }
748 
749  if (is_default != NULL) {
750  *is_default = false;
751  }
752  return char2score(attr_value);
753 }
754 
761 void
763 {
764  if (pe__set_clone_flag(rsc,
766  return;
767  }
768 
769  for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
770  pcmk_resource_t *child_rsc = (pcmk_resource_t *) iter->data;
771 
772  GHashTableIter iter;
773  pcmk_node_t *node = NULL;
774  int score, new_score;
775 
776  g_hash_table_iter_init(&iter, child_rsc->allowed_nodes);
777  while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
778  if (!pcmk__node_available(node, false, false)) {
779  /* This node will never be promoted, so don't apply the
780  * promotion score, as that may lead to clone shuffling.
781  */
782  continue;
783  }
784 
785  score = promotion_score(child_rsc, node, NULL);
786  if (score > 0) {
787  new_score = pcmk__add_scores(node->weight, score);
788  if (new_score != node->weight) { // Could remain INFINITY
789  node->weight = new_score;
790  pe_rsc_trace(rsc,
791  "Added %s promotion priority (%s) to score "
792  "on %s (now %s)",
793  child_rsc->id, pcmk_readable_score(score),
794  pe__node_name(node),
795  pcmk_readable_score(new_score));
796  }
797  }
798 
799  if (score > child_rsc->priority) {
800  pe_rsc_trace(rsc,
801  "Updating %s priority to promotion score (%d->%d)",
802  child_rsc->id, child_rsc->priority, score);
803  child_rsc->priority = score;
804  }
805  }
806  }
807 }
808 
816 static void
817 set_current_role_unpromoted(void *data, void *user_data)
818 {
820 
821  if (rsc->role == pcmk_role_started) {
822  // Promotable clones should use unpromoted role instead of started
823  rsc->role = pcmk_role_unpromoted;
824  }
825  g_list_foreach(rsc->children, set_current_role_unpromoted, NULL);
826 }
827 
835 static void
836 set_next_role_unpromoted(void *data, void *user_data)
837 {
839  GList *assigned = NULL;
840 
841  rsc->fns->location(rsc, &assigned, FALSE);
842  if (assigned == NULL) {
843  pe__set_next_role(rsc, pcmk_role_stopped, "stopped instance");
844  } else {
845  pe__set_next_role(rsc, pcmk_role_unpromoted, "unpromoted instance");
846  g_list_free(assigned);
847  }
848  g_list_foreach(rsc->children, set_next_role_unpromoted, NULL);
849 }
850 
858 static void
859 set_next_role_promoted(void *data, gpointer user_data)
860 {
862 
863  if (rsc->next_role == pcmk_role_unknown) {
864  pe__set_next_role(rsc, pcmk_role_promoted, "promoted instance");
865  }
866  g_list_foreach(rsc->children, set_next_role_promoted, NULL);
867 }
868 
875 static void
876 show_promotion_score(pcmk_resource_t *instance)
877 {
878  pcmk_node_t *chosen = instance->fns->location(instance, NULL, FALSE);
879 
881  && !pcmk__is_daemon && (instance->cluster->priv != NULL)) {
882 
883  pcmk__output_t *out = instance->cluster->priv;
884 
885  out->message(out, "promotion-score", instance, chosen,
886  pcmk_readable_score(instance->sort_index));
887  } else {
888  pe_rsc_debug(pe__const_top_resource(instance, false),
889  "%s promotion score on %s: sort=%s priority=%s",
890  instance->id,
891  ((chosen == NULL)? "none" : pe__node_name(chosen)),
892  pcmk_readable_score(instance->sort_index),
893  pcmk_readable_score(instance->priority));
894  }
895 }
896 
904 static void
905 set_instance_priority(gpointer data, gpointer user_data)
906 {
907  pcmk_resource_t *instance = (pcmk_resource_t *) data;
908  const pcmk_resource_t *clone = (const pcmk_resource_t *) user_data;
909  const pcmk_node_t *chosen = NULL;
910  enum rsc_role_e next_role = pcmk_role_unknown;
911  GList *list = NULL;
912 
913  pe_rsc_trace(clone, "Assigning priority for %s: %s", instance->id,
914  role2text(instance->next_role));
915 
916  if (instance->fns->state(instance, TRUE) == pcmk_role_started) {
917  set_current_role_unpromoted(instance, NULL);
918  }
919 
920  // Only an instance that will be active can be promoted
921  chosen = instance->fns->location(instance, &list, FALSE);
922  if (pcmk__list_of_multiple(list)) {
923  pcmk__config_err("Cannot promote non-colocated child %s",
924  instance->id);
925  }
926  g_list_free(list);
927  if (chosen == NULL) {
928  return;
929  }
930 
931  next_role = instance->fns->state(instance, FALSE);
932  switch (next_role) {
933  case pcmk_role_started:
934  case pcmk_role_unknown:
935  // Set instance priority to its promotion score (or -1 if none)
936  {
937  bool is_default = false;
938 
939  instance->priority = promotion_score(instance, chosen,
940  &is_default);
941  if (is_default) {
942  /*
943  * Default to -1 if no value is set. This allows
944  * instances eligible for promotion to be specified
945  * based solely on rsc_location constraints, but
946  * prevents any instance from being promoted if neither
947  * a constraint nor a promotion score is present
948  */
949  instance->priority = -1;
950  }
951  }
952  break;
953 
955  case pcmk_role_stopped:
956  // Instance can't be promoted
957  instance->priority = -INFINITY;
958  break;
959 
960  case pcmk_role_promoted:
961  // Nothing needed (re-creating actions after scheduling fencing)
962  break;
963 
964  default:
965  CRM_CHECK(FALSE, crm_err("Unknown resource role %d for %s",
966  next_role, instance->id));
967  }
968 
969  // Add relevant location constraint scores for promoted role
970  apply_promoted_locations(instance, instance->rsc_location, chosen);
971  apply_promoted_locations(instance, clone->rsc_location, chosen);
972 
973  // Consider instance's role-based colocations with other resources
974  list = pcmk__this_with_colocations(instance);
975  for (GList *iter = list; iter != NULL; iter = iter->next) {
976  pcmk__colocation_t *cons = (pcmk__colocation_t *) iter->data;
977 
978  instance->cmds->apply_coloc_score(instance, cons->primary, cons, true);
979  }
980  g_list_free(list);
981 
982  instance->sort_index = instance->priority;
983  if (next_role == pcmk_role_promoted) {
984  instance->sort_index = INFINITY;
985  }
986  pe_rsc_trace(clone, "Assigning %s priority = %d",
987  instance->id, instance->priority);
988 }
989 
997 static void
998 set_instance_role(gpointer data, gpointer user_data)
999 {
1000  pcmk_resource_t *instance = (pcmk_resource_t *) data;
1001  int *count = (int *) user_data;
1002 
1003  const pcmk_resource_t *clone = pe__const_top_resource(instance, false);
1004  pcmk_node_t *chosen = NULL;
1005 
1006  show_promotion_score(instance);
1007 
1008  if (instance->sort_index < 0) {
1009  pe_rsc_trace(clone, "Not supposed to promote instance %s",
1010  instance->id);
1011 
1012  } else if ((*count < pe__clone_promoted_max(instance))
1013  || !pcmk_is_set(clone->flags, pcmk_rsc_managed)) {
1014  chosen = node_to_be_promoted_on(instance);
1015  }
1016 
1017  if (chosen == NULL) {
1018  set_next_role_unpromoted(instance, NULL);
1019  return;
1020  }
1021 
1022  if ((instance->role < pcmk_role_promoted)
1023  && !pcmk_is_set(instance->cluster->flags, pcmk_sched_quorate)
1024  && (instance->cluster->no_quorum_policy == pcmk_no_quorum_freeze)) {
1025  crm_notice("Clone instance %s cannot be promoted without quorum",
1026  instance->id);
1027  set_next_role_unpromoted(instance, NULL);
1028  return;
1029  }
1030 
1031  chosen->count++;
1032  pe_rsc_info(clone, "Choosing %s (%s) on %s for promotion",
1033  instance->id, role2text(instance->role),
1034  pe__node_name(chosen));
1035  set_next_role_promoted(instance, NULL);
1036  (*count)++;
1037 }
1038 
1045 void
1047 {
1048  int promoted = 0;
1049  GHashTableIter iter;
1050  pcmk_node_t *node = NULL;
1051 
1052  // Repurpose count to track the number of promoted instances assigned
1053  g_hash_table_iter_init(&iter, rsc->allowed_nodes);
1054  while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
1055  node->count = 0;
1056  }
1057 
1058  // Set instances' promotion priorities and sort by highest priority first
1059  g_list_foreach(rsc->children, set_instance_priority, rsc);
1060  sort_promotable_instances(rsc);
1061 
1062  // Choose the first N eligible instances to be promoted
1063  g_list_foreach(rsc->children, set_instance_role, &promoted);
1064  pe_rsc_info(rsc, "%s: Promoted %d instances of a possible %d",
1065  rsc->id, promoted, pe__clone_promoted_max(rsc));
1066 }
1067 
1077 static void
1078 create_promotable_instance_actions(pcmk_resource_t *clone,
1079  bool *any_promoting, bool *any_demoting)
1080 {
1081  for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
1082  pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1083 
1084  instance->cmds->create_actions(instance);
1085  check_for_role_change(instance, any_demoting, any_promoting);
1086  }
1087 }
1088 
1099 static void
1100 reset_instance_priorities(pcmk_resource_t *clone)
1101 {
1102  for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
1103  pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1104 
1105  instance->priority = clone->priority;
1106  }
1107 }
1108 
1115 void
1117 {
1118  bool any_promoting = false;
1119  bool any_demoting = false;
1120 
1121  // Create actions for each clone instance individually
1122  create_promotable_instance_actions(clone, &any_promoting, &any_demoting);
1123 
1124  // Create pseudo-actions for clone as a whole
1125  pe__create_promotable_pseudo_ops(clone, any_promoting, any_demoting);
1126 
1127  // Undo our temporary repurposing of resource priority for instances
1128  reset_instance_priorities(clone);
1129 }
1130 
1137 void
1139 {
1140  pcmk_resource_t *previous = NULL; // Needed for ordered clones
1141 
1143 
1144  for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
1145  pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1146 
1147  // Demote before promote
1149  instance, PCMK_ACTION_PROMOTE,
1151 
1152  order_instance_promotion(clone, instance, previous);
1153  order_instance_demotion(clone, instance, previous);
1154  previous = instance;
1155  }
1156 }
1157 
1167 static void
1168 update_dependent_allowed_nodes(pcmk_resource_t *dependent,
1169  const pcmk_resource_t *primary,
1170  const pcmk_node_t *primary_node,
1171  const pcmk__colocation_t *colocation)
1172 {
1173  GHashTableIter iter;
1174  pcmk_node_t *node = NULL;
1175  const char *primary_value = NULL;
1176  const char *attr = colocation->node_attribute;
1177 
1178  if (colocation->score >= INFINITY) {
1179  return; // Colocation is mandatory, so allowed node scores don't matter
1180  }
1181 
1182  primary_value = pcmk__colocation_node_attr(primary_node, attr, primary);
1183 
1184  pe_rsc_trace(colocation->primary,
1185  "Applying %s (%s with %s on %s by %s @%d) to %s",
1186  colocation->id, colocation->dependent->id,
1187  colocation->primary->id, pe__node_name(primary_node), attr,
1188  colocation->score, dependent->id);
1189 
1190  g_hash_table_iter_init(&iter, dependent->allowed_nodes);
1191  while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
1192  const char *dependent_value = pcmk__colocation_node_attr(node, attr,
1193  dependent);
1194 
1195  if (pcmk__str_eq(primary_value, dependent_value, pcmk__str_casei)) {
1196  node->weight = pcmk__add_scores(node->weight, colocation->score);
1197  pe_rsc_trace(colocation->primary,
1198  "Added %s score (%s) to %s (now %s)",
1199  colocation->id, pcmk_readable_score(colocation->score),
1200  pe__node_name(node),
1201  pcmk_readable_score(node->weight));
1202  }
1203  }
1204 }
1205 
1213 void
1215  pcmk_resource_t *dependent,
1216  const pcmk__colocation_t *colocation)
1217 {
1218  GList *affected_nodes = NULL;
1219 
1220  /* Build a list of all nodes where an instance of the primary will be, and
1221  * (for optional colocations) update the dependent's allowed node scores for
1222  * each one.
1223  */
1224  for (GList *iter = primary->children; iter != NULL; iter = iter->next) {
1225  pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1226  pcmk_node_t *node = instance->fns->location(instance, NULL, FALSE);
1227 
1228  if (node == NULL) {
1229  continue;
1230  }
1231  if (instance->fns->state(instance, FALSE) == colocation->primary_role) {
1232  update_dependent_allowed_nodes(dependent, primary, node,
1233  colocation);
1234  affected_nodes = g_list_prepend(affected_nodes, node);
1235  }
1236  }
1237 
1238  /* For mandatory colocations, add the primary's node score to the
1239  * dependent's node score for each affected node, and ban the dependent
1240  * from all other nodes.
1241  *
1242  * However, skip this for promoted-with-promoted colocations, otherwise
1243  * inactive dependent instances can't start (in the unpromoted role).
1244  */
1245  if ((colocation->score >= INFINITY)
1246  && ((colocation->dependent_role != pcmk_role_promoted)
1247  || (colocation->primary_role != pcmk_role_promoted))) {
1248 
1249  pe_rsc_trace(colocation->primary,
1250  "Applying %s (mandatory %s with %s) to %s",
1251  colocation->id, colocation->dependent->id,
1252  colocation->primary->id, dependent->id);
1253  pcmk__colocation_intersect_nodes(dependent, primary, colocation,
1254  affected_nodes, true);
1255  }
1256  g_list_free(affected_nodes);
1257 }
1258 
1267 void
1269  pcmk_resource_t *dependent,
1270  const pcmk__colocation_t *colocation)
1271 {
1272  pcmk_resource_t *primary_instance = NULL;
1273 
1274  // Look for a primary instance where dependent will be
1275  primary_instance = pcmk__find_compatible_instance(dependent, primary,
1276  colocation->primary_role,
1277  false);
1278 
1279  if (primary_instance != NULL) {
1280  // Add primary instance's priority to dependent's
1281  int new_priority = pcmk__add_scores(dependent->priority,
1282  colocation->score);
1283 
1284  pe_rsc_trace(colocation->primary,
1285  "Applying %s (%s with %s) to %s priority (%s + %s = %s)",
1286  colocation->id, colocation->dependent->id,
1287  colocation->primary->id, dependent->id,
1288  pcmk_readable_score(dependent->priority),
1289  pcmk_readable_score(colocation->score),
1290  pcmk_readable_score(new_priority));
1291  dependent->priority = new_priority;
1292 
1293  } else if (colocation->score >= INFINITY) {
1294  // Mandatory colocation, but primary won't be here
1295  pe_rsc_trace(colocation->primary,
1296  "Applying %s (%s with %s) to %s: can't be promoted",
1297  colocation->id, colocation->dependent->id,
1298  colocation->primary->id, dependent->id);
1299  dependent->priority = -INFINITY;
1300  }
1301 }
pcmk_assignment_methods_t * cmds
Resource assignment methods.
Definition: resources.h:417
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition: complex.c:962
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:238
enum rsc_role_e role_filter
Definition: internal.h:161
enum pe_quorum_policy no_quorum_policy
Response to loss of quorum.
Definition: scheduler.h:186
#define crm_notice(fmt, args...)
Definition: logging.h:383
GHashTable * known_on
Nodes where resource has been probed (key is node ID, not name)
Definition: resources.h:463
pcmk_scheduler_t * cluster
Cluster that resource is part of.
Definition: resources.h:412
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:36
node_type
Possible node types.
Definition: nodes.h:33
char data[0]
Definition: cpg.c:55
#define INFINITY
Definition: crm.h:98
Whether action should not be executed.
Definition: actions.h:244
pcmk_node_t *(* location)(const pcmk_resource_t *rsc, GList **list, int current)
List nodes where a resource (or any of its children) is.
Definition: resources.h:339
void(* create_actions)(pcmk_resource_t *rsc)
void pcmk__update_promotable_dependent_priority(const pcmk_resource_t *primary, pcmk_resource_t *dependent, const pcmk__colocation_t *colocation)
const char * pcmk_readable_score(int score)
Return a displayable static string for a score value.
Definition: scores.c:86
Stopped.
Definition: roles.h:29
const char * name
Definition: cib.c:26
int(* message)(pcmk__output_t *out, const char *message_id,...)
pcmk_resource_t rsc2
enum rsc_role_e role
Resource&#39;s current role.
Definition: resources.h:468
G_GNUC_INTERNAL void pcmk__require_promotion_tickets(pcmk_resource_t *rsc)
Whether partition has quorum (via have-quorum property)
Definition: scheduler.h:71
GList * children
Resource&#39;s child resources, if any.
Definition: resources.h:475
int count
Counter reused by assignment and promotion code.
Definition: nodes.h:133
Match only clones and their instances, by either clone or instance ID.
Definition: resources.h:205
enum rsc_role_e(* state)(const pcmk_resource_t *rsc, gboolean current)
Get resource&#39;s current or assigned role.
Definition: resources.h:327
enum rsc_role_e next_role
Resource&#39;s scheduled next role.
Definition: resources.h:469
Implementation of pcmk_action_t.
Definition: actions.h:390
int char2score(const char *score)
Get the integer value of a score string.
Definition: scores.c:36
#define pcmk__config_err(fmt...)
Whether node scores should be output instead of logged.
Definition: scheduler.h:158
G_GNUC_INTERNAL pcmk_resource_t * pcmk__find_compatible_instance(const pcmk_resource_t *match_rsc, const pcmk_resource_t *rsc, enum rsc_role_e role, bool current)
void(* add_colocated_node_scores)(pcmk_resource_t *source_rsc, const pcmk_resource_t *target_rsc, const char *log_id, GHashTable **nodes, const pcmk__colocation_t *colocation, float factor, uint32_t flags)
Promoted.
Definition: roles.h:32
char * pcmk_promotion_score_name(const char *rsc_id)
Return the name of the node attribute used as a promotion score.
Definition: attrs.c:80
#define CRM_LOG_ASSERT(expr)
Definition: logging.h:222
Where resource is assigned.
int pe__set_clone_flag(pcmk_resource_t *clone, enum pcmk__clone_flags flag)
Definition: clone.c:1319
G_GNUC_INTERNAL bool pcmk__node_available(const pcmk_node_t *node, bool consider_score, bool consider_guest)
const char * action
Definition: pcmk_fence.c:30
#define pe__set_resource_flags(resource, flags_to_set)
Definition: internal.h:64
pcmk_resource_t *(* find_rsc)(pcmk_resource_t *rsc, const char *search, const pcmk_node_t *node, int flags)
Search for a resource ID in a resource and its children.
Definition: resources.h:287
const char * role2text(enum rsc_role_e role)
Definition: common.c:458
#define PCMK_ACTION_DEMOTE
Definition: actions.h:49
int weight
Node score for a given resource.
Definition: nodes.h:131
#define crm_warn(fmt, args...)
Definition: logging.h:382
void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
Definition: complex.c:1184
void pcmk__order_promotable_instances(pcmk_resource_t *clone)
Implementation of pcmk_resource_t.
Definition: resources.h:399
Actions are ordered (optionally, if no other flags are set)
pcmk_node_t * pe_find_node_id(const GList *node_list, const char *id)
Find a node by ID in a list of nodes.
Definition: status.c:448
char * clone_strip(const char *last_rsc_id)
Definition: unpack.c:1865
int priority
Configured priority.
Definition: resources.h:422
const char * pe__node_attribute_calculated(const pcmk_node_t *node, const char *name, const pcmk_resource_t *rsc, enum pcmk__rsc_node node_type, bool force_host)
Definition: common.c:546
void * priv
For Pacemaker use only.
Definition: scheduler.h:229
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:99
void pcmk__create_promotable_actions(pcmk_resource_t *clone)
struct pe_node_shared_s * details
Basic node information.
Definition: nodes.h:134
void pe__create_promotable_pseudo_ops(pcmk_resource_t *clone, bool any_promoting, bool any_demoting)
Definition: clone.c:1362
G_GNUC_INTERNAL gint pcmk__cmp_instance(gconstpointer a, gconstpointer b)
bool pcmk__is_daemon
Definition: logging.c:47
unsigned long long flags
Group of enum pcmk_rsc_flags.
Definition: resources.h:429
Unpromoted.
Definition: roles.h:31
rsc_role_e
Definition: roles.h:27
char * clone_name
Resource instance ID in history.
Definition: resources.h:401
GList * actions
Definition: resources.h:447
Implementation of pcmk_node_t.
Definition: nodes.h:130
void pcmk__set_instance_roles(pcmk_resource_t *rsc)
int pe__clone_promoted_node_max(const pcmk_resource_t *clone)
Definition: clone.c:114
int pe__clone_promoted_max(const pcmk_resource_t *clone)
Definition: clone.c:97
pcmk_resource_t rsc1
const char * id
Node ID at the cluster layer.
Definition: nodes.h:67
#define pcmk__order_resource_actions(first_rsc, first_task, then_rsc, then_task, flags)
void(* apply_coloc_score)(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent)
pcmk_resource_t * primary
Whether resource has not yet been assigned to a node.
Definition: resources.h:127
Whether resource is in the process of modifying allowed node scores.
Definition: resources.h:133
If &#39;then&#39; is required, &#39;first&#39; must be added to the transition graph.
G_GNUC_INTERNAL GList * pcmk__with_this_colocations(const pcmk_resource_t *rsc)
pcmk_rsc_methods_t * fns
Resource object methods.
Definition: resources.h:416
pcmk__rsc_node
int sort_index
Promotion score on assigned node.
Definition: resources.h:424
#define crm_err(fmt, args...)
Definition: logging.h:381
If matching by node, compare current node instead of assigned node.
Definition: resources.h:208
#define CRM_ASSERT(expr)
Definition: results.h:42
If &#39;first&#39; is required and runnable, &#39;then&#39; must be in graph.
G_GNUC_INTERNAL GList * pcmk__this_with_colocations(const pcmk_resource_t *rsc)
Started.
Definition: roles.h:30
This structure contains everything that makes up a single output formatter.
GList * rsc_location
Definition: resources.h:446
void pcmk__add_promotion_scores(pcmk_resource_t *rsc)
G_GNUC_INTERNAL void pcmk__promotable_restart_ordering(pcmk_resource_t *rsc)
#define pe__clear_resource_flags(resource, flags_to_clear)
Definition: internal.h:70
#define PCMK_ACTION_PROMOTE
Definition: actions.h:65
pcmk_resource_t * dependent
#define pe__show_node_scores(level, rsc, text, nodes, scheduler)
Definition: internal.h:341
GList * running_on
Nodes where resource may be active.
Definition: resources.h:460
const char * node_attribute
int pcmk__add_scores(int score1, int score2)
Definition: scores.c:116
#define PCMK_ACTION_PROMOTED
Definition: actions.h:66
Resource role is unknown.
Definition: roles.h:28
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:37
bool pe__clone_is_ordered(const pcmk_resource_t *clone)
Definition: clone.c:1300
unsigned long long flags
Group of enum pcmk_scheduler_flags.
Definition: scheduler.h:183
const char * parent
Definition: cib.c:27
#define PCMK_ACTION_DEMOTED
Definition: actions.h:50
Whether resource is managed.
Definition: resources.h:106
G_GNUC_INTERNAL void pcmk__colocation_intersect_nodes(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation, const GList *primary_nodes, bool merge_scores)
uint64_t flags
Definition: remote.c:215
Whether resource is not an anonymous clone instance.
Definition: resources.h:118
void pcmk__update_dependent_with_promotable(const pcmk_resource_t *primary, pcmk_resource_t *dependent, const pcmk__colocation_t *colocation)
Update dependent for a colocation with a promotable clone.
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:35
char * id
Resource ID in configuration.
Definition: resources.h:400
GHashTable * allowed_nodes
Nodes where resource may run (key is node ID, not name)
Definition: resources.h:466
Where resource is running.