pacemaker  2.1.9-49aab99839
Scalable High-Availability cluster resource manager
pcmk_sched_promotable.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2024 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <crm/common/xml.h>
13 #include <pacemaker-internal.h>
14 
15 #include "libpacemaker_private.h"
16 
25 static void
26 order_instance_promotion(pcmk_resource_t *clone, pcmk_resource_t *child,
27  pcmk_resource_t *last)
28 {
29  // "Promote clone" -> promote instance -> "clone promoted"
31  child, PCMK_ACTION_PROMOTE,
34  clone, PCMK_ACTION_PROMOTED,
36 
37  // If clone is ordered, order this instance relative to last
38  if ((last != NULL) && pe__clone_is_ordered(clone)) {
40  child, PCMK_ACTION_PROMOTE,
42  }
43 }
44 
53 static void
54 order_instance_demotion(pcmk_resource_t *clone, pcmk_resource_t *child,
55  pcmk_resource_t *last)
56 {
57  // "Demote clone" -> demote instance -> "clone demoted"
62  clone, PCMK_ACTION_DEMOTED,
64 
65  // If clone is ordered, order this instance relative to last
66  if ((last != NULL) && pe__clone_is_ordered(clone)) {
69  }
70 }
71 
80 static void
81 check_for_role_change(const pcmk_resource_t *rsc, bool *demoting,
82  bool *promoting)
83 {
84  const GList *iter = NULL;
85 
86  // If this is a cloned group, check group members recursively
87  if (rsc->children != NULL) {
88  for (iter = rsc->children; iter != NULL; iter = iter->next) {
89  check_for_role_change((const pcmk_resource_t *) iter->data,
90  demoting, promoting);
91  }
92  return;
93  }
94 
95  for (iter = rsc->actions; iter != NULL; iter = iter->next) {
96  const pcmk_action_t *action = (const pcmk_action_t *) iter->data;
97 
98  if (*promoting && *demoting) {
99  return;
100 
101  } else if (pcmk_is_set(action->flags, pcmk_action_optional)) {
102  continue;
103 
104  } else if (pcmk__str_eq(PCMK_ACTION_DEMOTE, action->task,
105  pcmk__str_none)) {
106  *demoting = true;
107 
108  } else if (pcmk__str_eq(PCMK_ACTION_PROMOTE, action->task,
109  pcmk__str_none)) {
110  *promoting = true;
111  }
112  }
113 }
114 
127 static void
128 apply_promoted_locations(pcmk_resource_t *child,
129  const GList *location_constraints,
130  const pcmk_node_t *chosen)
131 {
132  for (const GList *iter = location_constraints; iter; iter = iter->next) {
133  const pcmk__location_t *location = iter->data;
134  const pcmk_node_t *constraint_node = NULL;
135 
136  if (location->role_filter == pcmk_role_promoted) {
137  constraint_node = pe_find_node_id(location->nodes,
138  chosen->details->id);
139  }
140  if (constraint_node != NULL) {
141  int new_priority = pcmk__add_scores(child->priority,
142  constraint_node->weight);
143 
144  pcmk__rsc_trace(child,
145  "Applying location %s to %s promotion priority on "
146  "%s: %s + %s = %s",
147  location->id, child->id,
148  pcmk__node_name(constraint_node),
150  pcmk_readable_score(constraint_node->weight),
151  pcmk_readable_score(new_priority));
152  child->priority = new_priority;
153  }
154  }
155 }
156 
165 static pcmk_node_t *
166 node_to_be_promoted_on(const pcmk_resource_t *rsc)
167 {
168  pcmk_node_t *node = NULL;
169  pcmk_node_t *local_node = NULL;
170  const pcmk_resource_t *parent = NULL;
171 
172  // If this is a cloned group, bail if any group member can't be promoted
173  for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
174  pcmk_resource_t *child = (pcmk_resource_t *) iter->data;
175 
176  if (node_to_be_promoted_on(child) == NULL) {
177  pcmk__rsc_trace(rsc,
178  "%s can't be promoted because member %s can't",
179  rsc->id, child->id);
180  return NULL;
181  }
182  }
183 
184  node = rsc->fns->location(rsc, NULL, FALSE);
185  if (node == NULL) {
186  pcmk__rsc_trace(rsc, "%s can't be promoted because it won't be active",
187  rsc->id);
188  return NULL;
189 
190  } else if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
191  if (rsc->fns->state(rsc, TRUE) == pcmk_role_promoted) {
192  crm_notice("Unmanaged instance %s will be left promoted on %s",
193  rsc->id, pcmk__node_name(node));
194  } else {
195  pcmk__rsc_trace(rsc, "%s can't be promoted because it is unmanaged",
196  rsc->id);
197  return NULL;
198  }
199 
200  } else if (rsc->priority < 0) {
201  pcmk__rsc_trace(rsc,
202  "%s can't be promoted because its promotion priority "
203  "%d is negative",
204  rsc->id, rsc->priority);
205  return NULL;
206 
207  } else if (!pcmk__node_available(node, false, true)) {
208  pcmk__rsc_trace(rsc,
209  "%s can't be promoted because %s can't run resources",
210  rsc->id, pcmk__node_name(node));
211  return NULL;
212  }
213 
214  parent = pe__const_top_resource(rsc, false);
215  local_node = g_hash_table_lookup(parent->allowed_nodes, node->details->id);
216 
217  if (local_node == NULL) {
218  /* It should not be possible for the scheduler to have assigned the
219  * instance to a node where its parent is not allowed, but it's good to
220  * have a fail-safe.
221  */
222  if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
223  pcmk__sched_err("%s can't be promoted because %s is not allowed "
224  "on %s (scheduler bug?)",
225  rsc->id, parent->id, pcmk__node_name(node));
226  } // else the instance is unmanaged and already promoted
227  return NULL;
228 
229  } else if ((local_node->count >= pe__clone_promoted_node_max(parent))
230  && pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
231  pcmk__rsc_trace(rsc,
232  "%s can't be promoted because %s has "
233  "maximum promoted instances already",
234  rsc->id, pcmk__node_name(node));
235  return NULL;
236  }
237 
238  return local_node;
239 }
240 
252 static gint
253 cmp_promotable_instance(gconstpointer a, gconstpointer b)
254 {
255  const pcmk_resource_t *rsc1 = (const pcmk_resource_t *) a;
256  const pcmk_resource_t *rsc2 = (const pcmk_resource_t *) b;
257 
258  enum rsc_role_e role1 = pcmk_role_unknown;
259  enum rsc_role_e role2 = pcmk_role_unknown;
260 
261  pcmk__assert((rsc1 != NULL) && (rsc2 != NULL));
262 
263  // Check sort index set by pcmk__set_instance_roles()
264  if (rsc1->sort_index > rsc2->sort_index) {
266  "%s has higher promotion priority than %s "
267  "(sort index %d > %d)",
269  return -1;
270  } else if (rsc1->sort_index < rsc2->sort_index) {
272  "%s has lower promotion priority than %s "
273  "(sort index %d < %d)",
275  return 1;
276  }
277 
278  // If those are the same, prefer instance whose current role is higher
279  role1 = rsc1->fns->state(rsc1, TRUE);
280  role2 = rsc2->fns->state(rsc2, TRUE);
281  if (role1 > role2) {
283  "%s has higher promotion priority than %s "
284  "(higher current role)",
285  rsc1->id, rsc2->id);
286  return -1;
287  } else if (role1 < role2) {
289  "%s has lower promotion priority than %s "
290  "(lower current role)",
291  rsc1->id, rsc2->id);
292  return 1;
293  }
294 
295  // Finally, do normal clone instance sorting
296  return pcmk__cmp_instance(a, b);
297 }
298 
310 static void
311 add_sort_index_to_node_score(gpointer data, gpointer user_data)
312 {
313  const pcmk_resource_t *child = (const pcmk_resource_t *) data;
314  pcmk_resource_t *clone = (pcmk_resource_t *) user_data;
315 
316  pcmk_node_t *node = NULL;
317  const pcmk_node_t *chosen = NULL;
318 
319  if (child->sort_index < 0) {
320  pcmk__rsc_trace(clone, "Not adding sort index of %s: negative",
321  child->id);
322  return;
323  }
324 
325  chosen = child->fns->location(child, NULL, FALSE);
326  if (chosen == NULL) {
327  pcmk__rsc_trace(clone, "Not adding sort index of %s: inactive",
328  child->id);
329  return;
330  }
331 
332  node = g_hash_table_lookup(clone->allowed_nodes, chosen->details->id);
333  pcmk__assert(node != NULL);
334 
335  node->weight = pcmk__add_scores(child->sort_index, node->weight);
336  pcmk__rsc_trace(clone,
337  "Added cumulative priority of %s (%s) to score on %s "
338  "(now %s)",
339  child->id, pcmk_readable_score(child->sort_index),
340  pcmk__node_name(node), pcmk_readable_score(node->weight));
341 }
342 
350 static void
351 apply_coloc_to_primary(gpointer data, gpointer user_data)
352 {
353  pcmk__colocation_t *colocation = data;
354  pcmk_resource_t *clone = user_data;
355  pcmk_resource_t *dependent = colocation->dependent;
356  const float factor = colocation->score / (float) PCMK_SCORE_INFINITY;
357  const uint32_t flags = pcmk__coloc_select_active
359 
360  if ((colocation->primary_role != pcmk_role_promoted)
361  || !pcmk__colocation_has_influence(colocation, NULL)) {
362  return;
363  }
364 
365  pcmk__rsc_trace(clone, "Applying colocation %s (%s with promoted %s) @%s",
366  colocation->id, colocation->dependent->id,
367  colocation->primary->id,
368  pcmk_readable_score(colocation->score));
369  dependent->cmds->add_colocated_node_scores(dependent, clone, clone->id,
370  &clone->allowed_nodes,
371  colocation, factor, flags);
372 }
373 
381 static void
382 set_sort_index_to_node_score(gpointer data, gpointer user_data)
383 {
384  pcmk_resource_t *child = (pcmk_resource_t *) data;
385  const pcmk_resource_t *clone = (const pcmk_resource_t *) user_data;
386 
387  pcmk_node_t *chosen = child->fns->location(child, NULL, FALSE);
388 
389  if (!pcmk_is_set(child->flags, pcmk_rsc_managed)
390  && (child->next_role == pcmk_role_promoted)) {
392  pcmk__rsc_trace(clone,
393  "Final sort index for %s is INFINITY "
394  "(unmanaged promoted)",
395  child->id);
396 
397  } else if (chosen == NULL) {
399  pcmk__rsc_trace(clone,
400  "Final promotion priority for %s is %s "
401  "(will not be active)",
403 
404  } else if (child->sort_index < 0) {
405  pcmk__rsc_trace(clone,
406  "Final sort index for %s is %d (ignoring node score)",
407  child->id, child->sort_index);
408 
409  } else {
410  const pcmk_node_t *node = g_hash_table_lookup(clone->allowed_nodes,
411  chosen->details->id);
412 
413  pcmk__assert(node != NULL);
414  child->sort_index = node->weight;
415  pcmk__rsc_trace(clone,
416  "Adding scores for %s: final sort index for %s is %d",
417  clone->id, child->id, child->sort_index);
418  }
419 }
420 
427 static void
428 sort_promotable_instances(pcmk_resource_t *clone)
429 {
430  GList *colocations = NULL;
431 
433  == pcmk_rc_already) {
434  return;
435  }
437 
438  for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
439  pcmk_resource_t *child = (pcmk_resource_t *) iter->data;
440 
441  pcmk__rsc_trace(clone,
442  "Adding scores for %s: initial sort index for %s is %d",
443  clone->id, child->id, child->sort_index);
444  }
445  pe__show_node_scores(true, clone, "Before", clone->allowed_nodes,
446  clone->cluster);
447 
448  g_list_foreach(clone->children, add_sort_index_to_node_score, clone);
449 
450  // "this with" colocations were already applied via set_instance_priority()
451  colocations = pcmk__with_this_colocations(clone);
452  g_list_foreach(colocations, apply_coloc_to_primary, clone);
453  g_list_free(colocations);
454 
455  // Ban resource from all nodes if it needs a ticket but doesn't have it
457 
458  pe__show_node_scores(true, clone, "After", clone->allowed_nodes,
459  clone->cluster);
460 
461  // Reset sort indexes to final node scores
462  g_list_foreach(clone->children, set_sort_index_to_node_score, clone);
463 
464  // Finally, sort instances in descending order of promotion priority
465  clone->children = g_list_sort(clone->children, cmp_promotable_instance);
467 }
468 
479 static pcmk_resource_t *
480 find_active_anon_instance(const pcmk_resource_t *clone, const char *id,
481  const pcmk_node_t *node)
482 {
483  for (GList *iter = clone->children; iter; iter = iter->next) {
484  pcmk_resource_t *child = iter->data;
485  pcmk_resource_t *active = NULL;
486 
487  // Use ->find_rsc() in case this is a cloned group
488  active = clone->fns->find_rsc(child, id, node,
491  if (active != NULL) {
492  return active;
493  }
494  }
495  return NULL;
496 }
497 
498 /*
499  * \brief Check whether an anonymous clone instance is known on a node
500  *
501  * \param[in] clone Anonymous clone to check
502  * \param[in] id Instance ID (without instance number) to check
503  * \param[in] node Node to check
504  *
505  * \return true if \p id instance of \p clone is known on \p node,
506  * otherwise false
507  */
508 static bool
509 anonymous_known_on(const pcmk_resource_t *clone, const char *id,
510  const pcmk_node_t *node)
511 {
512  for (GList *iter = clone->children; iter; iter = iter->next) {
513  pcmk_resource_t *child = iter->data;
514 
515  /* Use ->find_rsc() because this might be a cloned group, and knowing
516  * that other members of the group are known here implies nothing.
517  */
518  child = clone->fns->find_rsc(child, id, NULL,
520  CRM_LOG_ASSERT(child != NULL);
521  if (child != NULL) {
522  if (g_hash_table_lookup(child->known_on, node->details->id)) {
523  return true;
524  }
525  }
526  }
527  return false;
528 }
529 
539 static bool
540 is_allowed(const pcmk_resource_t *rsc, const pcmk_node_t *node)
541 {
542  pcmk_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes,
543  node->details->id);
544 
545  return (allowed != NULL) && (allowed->weight >= 0);
546 }
547 
557 static bool
558 promotion_score_applies(const pcmk_resource_t *rsc, const pcmk_node_t *node)
559 {
560  char *id = clone_strip(rsc->id);
561  const pcmk_resource_t *parent = pe__const_top_resource(rsc, false);
562  pcmk_resource_t *active = NULL;
563  const char *reason = "allowed";
564 
565  // Some checks apply only to anonymous clone instances
566  if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
567 
568  // If instance is active on the node, its score definitely applies
569  active = find_active_anon_instance(parent, id, node);
570  if (active == rsc) {
571  reason = "active";
572  goto check_allowed;
573  }
574 
575  /* If *no* instance is active on this node, this instance's score will
576  * count if it has been probed on this node.
577  */
578  if ((active == NULL) && anonymous_known_on(parent, id, node)) {
579  reason = "probed";
580  goto check_allowed;
581  }
582  }
583 
584  /* If this clone's status is unknown on *all* nodes (e.g. cluster startup),
585  * take all instances' scores into account, to make sure we use any
586  * permanent promotion scores.
587  */
588  if ((rsc->running_on == NULL) && (g_hash_table_size(rsc->known_on) == 0)) {
589  reason = "none probed";
590  goto check_allowed;
591  }
592 
593  /* Otherwise, we've probed and/or started the resource *somewhere*, so
594  * consider promotion scores on nodes where we know the status.
595  */
596  if ((g_hash_table_lookup(rsc->known_on, node->details->id) != NULL)
597  || (pe_find_node_id(rsc->running_on, node->details->id) != NULL)) {
598  reason = "known";
599  } else {
600  pcmk__rsc_trace(rsc,
601  "Ignoring %s promotion score (for %s) on %s: "
602  "not probed",
603  rsc->id, id, pcmk__node_name(node));
604  free(id);
605  return false;
606  }
607 
608 check_allowed:
609  if (is_allowed(rsc, node)) {
610  pcmk__rsc_trace(rsc, "Counting %s promotion score (for %s) on %s: %s",
611  rsc->id, id, pcmk__node_name(node), reason);
612  free(id);
613  return true;
614  }
615 
616  pcmk__rsc_trace(rsc,
617  "Ignoring %s promotion score (for %s) on %s: not allowed",
618  rsc->id, id, pcmk__node_name(node));
619  free(id);
620  return false;
621 }
622 
633 static const char *
634 promotion_attr_value(const pcmk_resource_t *rsc, const pcmk_node_t *node,
635  const char *name)
636 {
637  char *attr_name = NULL;
638  const char *attr_value = NULL;
639  const char *target = NULL;
641 
643  // Not assigned yet
645  }
646  target = g_hash_table_lookup(rsc->meta,
648  attr_name = pcmk_promotion_score_name(name);
649  attr_value = pcmk__node_attr(node, attr_name, target, node_type);
650  free(attr_name);
651  return attr_value;
652 }
653 
664 static int
665 promotion_score(const pcmk_resource_t *rsc, const pcmk_node_t *node,
666  bool *is_default)
667 {
668  int score = 0;
669  int rc = pcmk_rc_ok;
670  char *name = NULL;
671  const char *attr_value = NULL;
672 
673  if (is_default != NULL) {
674  *is_default = true;
675  }
676 
677  CRM_CHECK((rsc != NULL) && (node != NULL), return 0);
678 
679  /* If this is an instance of a cloned group, the promotion score is the sum
680  * of all members' promotion scores.
681  */
682  if (rsc->children != NULL) {
683  int score = 0;
684 
685  for (const GList *iter = rsc->children;
686  iter != NULL; iter = iter->next) {
687 
688  const pcmk_resource_t *child = (const pcmk_resource_t *) iter->data;
689  bool child_default = false;
690  int child_score = promotion_score(child, node, &child_default);
691 
692  if (!child_default && (is_default != NULL)) {
693  *is_default = false;
694  }
695  score += child_score;
696  }
697  return score;
698  }
699 
700  if (!promotion_score_applies(rsc, node)) {
701  return 0;
702  }
703 
704  /* For the promotion score attribute name, use the name the resource is
705  * known as in resource history, since that's what crm_attribute --promotion
706  * would have used.
707  */
708  name = (rsc->clone_name == NULL)? rsc->id : rsc->clone_name;
709 
710  attr_value = promotion_attr_value(rsc, node, name);
711  if (attr_value != NULL) {
712  pcmk__rsc_trace(rsc, "Promotion score for %s on %s = %s",
713  name, pcmk__node_name(node),
714  pcmk__s(attr_value, "(unset)"));
715  } else if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
716  /* If we don't have any resource history yet, we won't have clone_name.
717  * In that case, for anonymous clones, try the resource name without
718  * any instance number.
719  */
720  name = clone_strip(rsc->id);
721  if (strcmp(rsc->id, name) != 0) {
722  attr_value = promotion_attr_value(rsc, node, name);
723  pcmk__rsc_trace(rsc, "Promotion score for %s on %s (for %s) = %s",
724  name, pcmk__node_name(node), rsc->id,
725  pcmk__s(attr_value, "(unset)"));
726  }
727  free(name);
728  }
729 
730  if (attr_value == NULL) {
731  return 0;
732  }
733 
734  if (is_default != NULL) {
735  *is_default = false;
736  }
737 
738  rc = pcmk_parse_score(attr_value, &score, 0);
739  if (rc != pcmk_rc_ok) {
740  crm_warn("Using 0 as promotion score for %s on %s "
741  "because '%s' is not a valid score",
742  rsc->id, pcmk__node_name(node), attr_value);
743  }
744  return score;
745 }
746 
753 void
755 {
756  if (pe__set_clone_flag(rsc,
758  return;
759  }
760 
761  for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
762  pcmk_resource_t *child_rsc = (pcmk_resource_t *) iter->data;
763 
764  GHashTableIter iter;
765  pcmk_node_t *node = NULL;
766  int score, new_score;
767 
768  g_hash_table_iter_init(&iter, child_rsc->allowed_nodes);
769  while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
770  if (!pcmk__node_available(node, false, false)) {
771  /* This node will never be promoted, so don't apply the
772  * promotion score, as that may lead to clone shuffling.
773  */
774  continue;
775  }
776 
777  score = promotion_score(child_rsc, node, NULL);
778  if (score > 0) {
779  new_score = pcmk__add_scores(node->weight, score);
780  if (new_score != node->weight) { // Could remain INFINITY
781  node->weight = new_score;
782  pcmk__rsc_trace(rsc,
783  "Added %s promotion priority (%s) to score "
784  "on %s (now %s)",
785  child_rsc->id, pcmk_readable_score(score),
786  pcmk__node_name(node),
787  pcmk_readable_score(new_score));
788  }
789  }
790 
791  if (score > child_rsc->priority) {
792  pcmk__rsc_trace(rsc,
793  "Updating %s priority to promotion score "
794  "(%d->%d)",
795  child_rsc->id, child_rsc->priority, score);
796  child_rsc->priority = score;
797  }
798  }
799  }
800 }
801 
809 static void
810 set_current_role_unpromoted(void *data, void *user_data)
811 {
813 
814  if (rsc->role == pcmk_role_started) {
815  // Promotable clones should use unpromoted role instead of started
816  rsc->role = pcmk_role_unpromoted;
817  }
818  g_list_foreach(rsc->children, set_current_role_unpromoted, NULL);
819 }
820 
828 static void
829 set_next_role_unpromoted(void *data, void *user_data)
830 {
832  GList *assigned = NULL;
833 
834  rsc->fns->location(rsc, &assigned, FALSE);
835  if (assigned == NULL) {
836  pe__set_next_role(rsc, pcmk_role_stopped, "stopped instance");
837  } else {
838  pe__set_next_role(rsc, pcmk_role_unpromoted, "unpromoted instance");
839  g_list_free(assigned);
840  }
841  g_list_foreach(rsc->children, set_next_role_unpromoted, NULL);
842 }
843 
851 static void
852 set_next_role_promoted(void *data, gpointer user_data)
853 {
855 
856  if (rsc->next_role == pcmk_role_unknown) {
857  pe__set_next_role(rsc, pcmk_role_promoted, "promoted instance");
858  }
859  g_list_foreach(rsc->children, set_next_role_promoted, NULL);
860 }
861 
868 static void
869 show_promotion_score(pcmk_resource_t *instance)
870 {
871  pcmk_node_t *chosen = instance->fns->location(instance, NULL, FALSE);
872 
874  && !pcmk__is_daemon && (instance->cluster->priv != NULL)) {
875 
876  pcmk__output_t *out = instance->cluster->priv;
877 
878  out->message(out, "promotion-score", instance, chosen,
879  pcmk_readable_score(instance->sort_index));
880 
881  } else if (chosen == NULL) {
882  pcmk__rsc_debug(pe__const_top_resource(instance, false),
883  "%s promotion score (inactive): %s (priority=%d)",
884  instance->id, pcmk_readable_score(instance->sort_index), instance->priority);
885 
886  } else {
887  pcmk__rsc_debug(pe__const_top_resource(instance, false),
888  "%s promotion score on %s: %s (priority=%d)",
889  instance->id, pcmk__node_name(chosen),
890  pcmk_readable_score(instance->sort_index),
891  instance->priority);
892  }
893 }
894 
902 static void
903 set_instance_priority(gpointer data, gpointer user_data)
904 {
905  pcmk_resource_t *instance = (pcmk_resource_t *) data;
906  const pcmk_resource_t *clone = (const pcmk_resource_t *) user_data;
907  const pcmk_node_t *chosen = NULL;
908  enum rsc_role_e next_role = pcmk_role_unknown;
909  GList *list = NULL;
910 
911  pcmk__rsc_trace(clone, "Assigning priority for %s: %s", instance->id,
912  pcmk_role_text(instance->next_role));
913 
914  if (instance->fns->state(instance, TRUE) == pcmk_role_started) {
915  set_current_role_unpromoted(instance, NULL);
916  }
917 
918  // Only an instance that will be active can be promoted
919  chosen = instance->fns->location(instance, &list, FALSE);
920  if (pcmk__list_of_multiple(list)) {
921  pcmk__config_err("Cannot promote non-colocated child %s",
922  instance->id);
923  }
924  g_list_free(list);
925  if (chosen == NULL) {
926  return;
927  }
928 
929  next_role = instance->fns->state(instance, FALSE);
930  switch (next_role) {
931  case pcmk_role_started:
932  case pcmk_role_unknown:
933  // Set instance priority to its promotion score (or -1 if none)
934  {
935  bool is_default = false;
936 
937  instance->priority = promotion_score(instance, chosen,
938  &is_default);
939  if (is_default) {
940  /* Default to -1 if no value is set. This allows instances
941  * eligible for promotion to be specified based solely on
942  * PCMK_XE_RSC_LOCATION constraints, but prevents any
943  * instance from being promoted if neither a constraint nor
944  * a promotion score is present.
945  */
946  instance->priority = -1;
947  }
948  }
949  break;
950 
952  case pcmk_role_stopped:
953  // Instance can't be promoted
954  instance->priority = -PCMK_SCORE_INFINITY;
955  break;
956 
957  case pcmk_role_promoted:
958  // Nothing needed (re-creating actions after scheduling fencing)
959  break;
960 
961  default:
962  CRM_CHECK(FALSE, crm_err("Unknown resource role %d for %s",
963  next_role, instance->id));
964  }
965 
966  // Add relevant location constraint scores for promoted role
967  apply_promoted_locations(instance, instance->rsc_location, chosen);
968  apply_promoted_locations(instance, clone->rsc_location, chosen);
969 
970  // Consider instance's role-based colocations with other resources
971  list = pcmk__this_with_colocations(instance);
972  for (GList *iter = list; iter != NULL; iter = iter->next) {
973  pcmk__colocation_t *cons = (pcmk__colocation_t *) iter->data;
974 
975  instance->cmds->apply_coloc_score(instance, cons->primary, cons, true);
976  }
977  g_list_free(list);
978 
979  instance->sort_index = instance->priority;
980  if (next_role == pcmk_role_promoted) {
981  instance->sort_index = PCMK_SCORE_INFINITY;
982  }
983  pcmk__rsc_trace(clone, "Assigning %s priority = %d",
984  instance->id, instance->priority);
985 }
986 
994 static void
995 set_instance_role(gpointer data, gpointer user_data)
996 {
997  pcmk_resource_t *instance = (pcmk_resource_t *) data;
998  int *count = (int *) user_data;
999 
1000  const pcmk_resource_t *clone = pe__const_top_resource(instance, false);
1001  pcmk_node_t *chosen = NULL;
1002 
1003  show_promotion_score(instance);
1004 
1005  if (instance->sort_index < 0) {
1006  pcmk__rsc_trace(clone, "Not supposed to promote instance %s",
1007  instance->id);
1008 
1009  } else if ((*count < pe__clone_promoted_max(instance))
1010  || !pcmk_is_set(clone->flags, pcmk_rsc_managed)) {
1011  chosen = node_to_be_promoted_on(instance);
1012  }
1013 
1014  if (chosen == NULL) {
1015  set_next_role_unpromoted(instance, NULL);
1016  return;
1017  }
1018 
1019  if ((instance->role < pcmk_role_promoted)
1020  && !pcmk_is_set(instance->cluster->flags, pcmk_sched_quorate)
1021  && (instance->cluster->no_quorum_policy == pcmk_no_quorum_freeze)) {
1022  crm_notice("Clone instance %s cannot be promoted without quorum",
1023  instance->id);
1024  set_next_role_unpromoted(instance, NULL);
1025  return;
1026  }
1027 
1028  chosen->count++;
1029  pcmk__rsc_info(clone, "Choosing %s (%s) on %s for promotion",
1030  instance->id, pcmk_role_text(instance->role),
1031  pcmk__node_name(chosen));
1032  set_next_role_promoted(instance, NULL);
1033  (*count)++;
1034 }
1035 
1042 void
1044 {
1045  int promoted = 0;
1046  GHashTableIter iter;
1047  pcmk_node_t *node = NULL;
1048 
1049  // Repurpose count to track the number of promoted instances assigned
1050  g_hash_table_iter_init(&iter, rsc->allowed_nodes);
1051  while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
1052  node->count = 0;
1053  }
1054 
1055  // Set instances' promotion priorities and sort by highest priority first
1056  g_list_foreach(rsc->children, set_instance_priority, rsc);
1057  sort_promotable_instances(rsc);
1058 
1059  // Choose the first N eligible instances to be promoted
1060  g_list_foreach(rsc->children, set_instance_role, &promoted);
1061  pcmk__rsc_info(rsc, "%s: Promoted %d instances of a possible %d",
1062  rsc->id, promoted, pe__clone_promoted_max(rsc));
1063 }
1064 
1074 static void
1075 create_promotable_instance_actions(pcmk_resource_t *clone,
1076  bool *any_promoting, bool *any_demoting)
1077 {
1078  for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
1079  pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1080 
1081  instance->cmds->create_actions(instance);
1082  check_for_role_change(instance, any_demoting, any_promoting);
1083  }
1084 }
1085 
1096 static void
1097 reset_instance_priorities(pcmk_resource_t *clone)
1098 {
1099  for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
1100  pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1101 
1102  instance->priority = clone->priority;
1103  }
1104 }
1105 
1112 void
1114 {
1115  bool any_promoting = false;
1116  bool any_demoting = false;
1117 
1118  // Create actions for each clone instance individually
1119  create_promotable_instance_actions(clone, &any_promoting, &any_demoting);
1120 
1121  // Create pseudo-actions for clone as a whole
1122  pe__create_promotable_pseudo_ops(clone, any_promoting, any_demoting);
1123 
1124  // Undo our temporary repurposing of resource priority for instances
1125  reset_instance_priorities(clone);
1126 }
1127 
1134 void
1136 {
1137  pcmk_resource_t *previous = NULL; // Needed for ordered clones
1138 
1140 
1141  for (GList *iter = clone->children; iter != NULL; iter = iter->next) {
1142  pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1143 
1144  // Demote before promote
1146  instance, PCMK_ACTION_PROMOTE,
1148 
1149  order_instance_promotion(clone, instance, previous);
1150  order_instance_demotion(clone, instance, previous);
1151  previous = instance;
1152  }
1153 }
1154 
1164 static void
1165 update_dependent_allowed_nodes(pcmk_resource_t *dependent,
1166  const pcmk_resource_t *primary,
1167  const pcmk_node_t *primary_node,
1168  const pcmk__colocation_t *colocation)
1169 {
1170  GHashTableIter iter;
1171  pcmk_node_t *node = NULL;
1172  const char *primary_value = NULL;
1173  const char *attr = colocation->node_attribute;
1174 
1175  if (colocation->score >= PCMK_SCORE_INFINITY) {
1176  return; // Colocation is mandatory, so allowed node scores don't matter
1177  }
1178 
1179  primary_value = pcmk__colocation_node_attr(primary_node, attr, primary);
1180 
1181  pcmk__rsc_trace(colocation->primary,
1182  "Applying %s (%s with %s on %s by %s @%d) to %s",
1183  colocation->id, colocation->dependent->id,
1184  colocation->primary->id, pcmk__node_name(primary_node),
1185  attr, colocation->score, dependent->id);
1186 
1187  g_hash_table_iter_init(&iter, dependent->allowed_nodes);
1188  while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
1189  const char *dependent_value = pcmk__colocation_node_attr(node, attr,
1190  dependent);
1191 
1192  if (pcmk__str_eq(primary_value, dependent_value, pcmk__str_casei)) {
1193  node->weight = pcmk__add_scores(node->weight, colocation->score);
1194  pcmk__rsc_trace(colocation->primary,
1195  "Added %s score (%s) to %s (now %s)",
1196  colocation->id,
1197  pcmk_readable_score(colocation->score),
1198  pcmk__node_name(node),
1199  pcmk_readable_score(node->weight));
1200  }
1201  }
1202 }
1203 
1211 void
1213  pcmk_resource_t *dependent,
1214  const pcmk__colocation_t *colocation)
1215 {
1216  GList *affected_nodes = NULL;
1217 
1218  /* Build a list of all nodes where an instance of the primary will be, and
1219  * (for optional colocations) update the dependent's allowed node scores for
1220  * each one.
1221  */
1222  for (GList *iter = primary->children; iter != NULL; iter = iter->next) {
1223  pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1224  pcmk_node_t *node = instance->fns->location(instance, NULL, FALSE);
1225 
1226  if (node == NULL) {
1227  continue;
1228  }
1229  if (instance->fns->state(instance, FALSE) == colocation->primary_role) {
1230  update_dependent_allowed_nodes(dependent, primary, node,
1231  colocation);
1232  affected_nodes = g_list_prepend(affected_nodes, node);
1233  }
1234  }
1235 
1236  /* For mandatory colocations, add the primary's node score to the
1237  * dependent's node score for each affected node, and ban the dependent
1238  * from all other nodes.
1239  *
1240  * However, skip this for promoted-with-promoted colocations, otherwise
1241  * inactive dependent instances can't start (in the unpromoted role).
1242  */
1243  if ((colocation->score >= PCMK_SCORE_INFINITY)
1244  && ((colocation->dependent_role != pcmk_role_promoted)
1245  || (colocation->primary_role != pcmk_role_promoted))) {
1246 
1247  pcmk__rsc_trace(colocation->primary,
1248  "Applying %s (mandatory %s with %s) to %s",
1249  colocation->id, colocation->dependent->id,
1250  colocation->primary->id, dependent->id);
1251  pcmk__colocation_intersect_nodes(dependent, primary, colocation,
1252  affected_nodes, true);
1253  }
1254  g_list_free(affected_nodes);
1255 }
1256 
1267 int
1269  pcmk_resource_t *dependent,
1270  const pcmk__colocation_t *colocation)
1271 {
1272  pcmk_resource_t *primary_instance = NULL;
1273 
1274  // Look for a primary instance where dependent will be
1275  primary_instance = pcmk__find_compatible_instance(dependent, primary,
1276  colocation->primary_role,
1277  false);
1278 
1279  if (primary_instance != NULL) {
1280  // Add primary instance's priority to dependent's
1281  int new_priority = pcmk__add_scores(dependent->priority,
1282  colocation->score);
1283 
1284  pcmk__rsc_trace(colocation->primary,
1285  "Applying %s (%s with %s) to %s priority "
1286  "(%s + %s = %s)",
1287  colocation->id, colocation->dependent->id,
1288  colocation->primary->id, dependent->id,
1289  pcmk_readable_score(dependent->priority),
1290  pcmk_readable_score(colocation->score),
1291  pcmk_readable_score(new_priority));
1292  dependent->priority = new_priority;
1293  return colocation->score;
1294  }
1295 
1296  if (colocation->score >= PCMK_SCORE_INFINITY) {
1297  // Mandatory colocation, but primary won't be here
1298  pcmk__rsc_trace(colocation->primary,
1299  "Applying %s (%s with %s) to %s: can't be promoted",
1300  colocation->id, colocation->dependent->id,
1301  colocation->primary->id, dependent->id);
1302  dependent->priority = -PCMK_SCORE_INFINITY;
1303  return -PCMK_SCORE_INFINITY;
1304  }
1305  return 0;
1306 }
pcmk_assignment_methods_t * cmds
Definition: resources.h:413
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition: complex.c:1071
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:245
enum pe_quorum_policy no_quorum_policy
Definition: scheduler.h:217
#define crm_notice(fmt, args...)
Definition: logging.h:397
GHashTable * known_on
Definition: resources.h:459
pcmk_scheduler_t * cluster
Definition: resources.h:408
const char * pcmk_role_text(enum rsc_role_e role)
Get readable description of a resource role.
Definition: roles.c:23
node_type
Definition: nodes.h:39
char data[0]
Definition: cpg.c:58
pcmk_node_t *(* location)(const pcmk_resource_t *rsc, GList **list, int current)
Definition: resources.h:328
void(* create_actions)(pcmk_resource_t *rsc)
Stopped.
Definition: roles.h:36
const char * name
Definition: cib.c:26
int(* message)(pcmk__output_t *out, const char *message_id,...)
const char * pcmk_readable_score(int score)
Return a displayable static string for a score value.
Definition: scores.c:137
pcmk_resource_t rsc2
enum rsc_role_e role
Definition: resources.h:464
G_GNUC_INTERNAL void pcmk__require_promotion_tickets(pcmk_resource_t *rsc)
GList * children
Definition: resources.h:471
#define pcmk__rsc_trace(rsc, fmt, args...)
#define PCMK_META_CONTAINER_ATTRIBUTE_TARGET
Definition: options.h:85
int count
Definition: nodes.h:165
Match only clones and their instances, by either clone or instance ID.
Definition: resources.h:191
#define pcmk__rsc_info(rsc, fmt, args...)
enum rsc_role_e(* state)(const pcmk_resource_t *rsc, gboolean current)
Definition: resources.h:316
enum rsc_role_e next_role
Definition: resources.h:465
#define pcmk__config_err(fmt...)
GHashTable * meta
Definition: resources.h:467
G_GNUC_INTERNAL pcmk_resource_t * pcmk__find_compatible_instance(const pcmk_resource_t *match_rsc, const pcmk_resource_t *rsc, enum rsc_role_e role, bool current)
void(* add_colocated_node_scores)(pcmk_resource_t *source_rsc, const pcmk_resource_t *target_rsc, const char *log_id, GHashTable **nodes, const pcmk__colocation_t *colocation, float factor, uint32_t flags)
Promoted.
Definition: roles.h:39
char * pcmk_promotion_score_name(const char *rsc_id)
Return the name of the node attribute used as a promotion score.
Definition: attrs.c:92
#define CRM_LOG_ASSERT(expr)
Definition: logging.h:228
Where resource is assigned.
int pe__set_clone_flag(pcmk_resource_t *clone, enum pcmk__clone_flags flag)
Definition: clone.c:1363
G_GNUC_INTERNAL bool pcmk__node_available(const pcmk_node_t *node, bool consider_score, bool consider_guest)
const char * action
Definition: pcmk_fence.c:30
#define pcmk__rsc_debug(rsc, fmt, args...)
pcmk_resource_t *(* find_rsc)(pcmk_resource_t *rsc, const char *search, const pcmk_node_t *node, int flags)
Definition: resources.h:276
#define PCMK_ACTION_DEMOTE
Definition: actions.h:49
int weight
Definition: nodes.h:163
#define crm_warn(fmt, args...)
Definition: logging.h:394
void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
Definition: complex.c:1292
void pcmk__order_promotable_instances(pcmk_resource_t *clone)
Actions are ordered (optionally, if no other flags are set)
pcmk_node_t * pe_find_node_id(const GList *node_list, const char *id)
Find a node by ID in a list of nodes.
Definition: status.c:492
char * clone_strip(const char *last_rsc_id)
Definition: unpack.c:1988
#define pcmk__sched_err(fmt...)
int pcmk__update_promotable_dependent_priority(const pcmk_resource_t *primary, pcmk_resource_t *dependent, const pcmk__colocation_t *colocation)
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:94
void pcmk__create_promotable_actions(pcmk_resource_t *clone)
struct pe_node_shared_s * details
Definition: nodes.h:168
void pe__create_promotable_pseudo_ops(pcmk_resource_t *clone, bool any_promoting, bool any_demoting)
Definition: clone.c:1406
G_GNUC_INTERNAL gint pcmk__cmp_instance(gconstpointer a, gconstpointer b)
bool pcmk__is_daemon
Definition: logging.c:47
unsigned long long flags
Definition: resources.h:428
Unpromoted.
Definition: roles.h:38
Wrappers for and extensions to libxml2.
rsc_role_e
Definition: roles.h:34
char * clone_name
Definition: resources.h:397
GList * actions
Definition: resources.h:444
#define pcmk__set_rsc_flags(resource, flags_to_set)
int pcmk__add_scores(int score1, int score2)
Definition: scores.c:167
void pcmk__set_instance_roles(pcmk_resource_t *rsc)
int pe__clone_promoted_node_max(const pcmk_resource_t *clone)
Definition: clone.c:114
int pe__clone_promoted_max(const pcmk_resource_t *clone)
Definition: clone.c:97
pcmk_resource_t rsc1
const char * id
Definition: nodes.h:73
#define pcmk__order_resource_actions(first_rsc, first_task, then_rsc, then_task, flags)
pcmk_resource_t * primary
#define pcmk__assert(expr)
const char * target
Definition: pcmk_fence.c:29
If &#39;then&#39; is required, &#39;first&#39; must be added to the transition graph.
G_GNUC_INTERNAL GList * pcmk__with_this_colocations(const pcmk_resource_t *rsc)
pcmk_rsc_methods_t * fns
Definition: resources.h:412
pcmk__rsc_node
#define crm_err(fmt, args...)
Definition: logging.h:391
If matching by node, compare current node instead of assigned node.
Definition: resources.h:194
If &#39;first&#39; is required and runnable, &#39;then&#39; must be in graph.
G_GNUC_INTERNAL GList * pcmk__this_with_colocations(const pcmk_resource_t *rsc)
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
Definition: attrs.c:118
Started.
Definition: roles.h:37
This structure contains everything that makes up a single output formatter.
GList * rsc_location
Definition: resources.h:443
enum rsc_role_e role_filter
void pcmk__add_promotion_scores(pcmk_resource_t *rsc)
G_GNUC_INTERNAL void pcmk__promotable_restart_ordering(pcmk_resource_t *rsc)
#define PCMK_ACTION_PROMOTE
Definition: actions.h:66
int pcmk_parse_score(const char *score_s, int *score, int default_score)
Parse an integer score from a string.
Definition: scores.c:39
pcmk_resource_t * dependent
#define pe__show_node_scores(level, rsc, text, nodes, scheduler)
Definition: internal.h:176
GList * running_on
Definition: resources.h:456
const char * node_attribute
#define PCMK_ACTION_PROMOTED
Definition: actions.h:67
Resource role is unknown.
Definition: roles.h:35
Location constraint object.
bool pe__clone_is_ordered(const pcmk_resource_t *clone)
Definition: clone.c:1344
unsigned long long flags
Definition: scheduler.h:211
const char * parent
Definition: cib.c:27
#define PCMK_ACTION_DEMOTED
Definition: actions.h:50
#define pcmk__clear_rsc_flags(resource, flags_to_clear)
int(* apply_coloc_score)(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent)
G_GNUC_INTERNAL void pcmk__colocation_intersect_nodes(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation, const GList *primary_nodes, bool merge_scores)
uint64_t flags
Definition: remote.c:215
G_GNUC_INTERNAL bool pcmk__colocation_has_influence(const pcmk__colocation_t *colocation, const pcmk_resource_t *rsc)
void pcmk__update_dependent_with_promotable(const pcmk_resource_t *primary, pcmk_resource_t *dependent, const pcmk__colocation_t *colocation)
Update dependent for a colocation with a promotable clone.
#define PCMK_SCORE_INFINITY
Integer score to use to represent "infinity".
Definition: scores.h:24
GHashTable * allowed_nodes
Definition: resources.h:462
Where resource is running.