pacemaker 3.0.1-16e74fc4da
Scalable High-Availability cluster resource manager
Loading...
Searching...
No Matches
pcmk_sched_promotable.c
Go to the documentation of this file.
1/*
2 * Copyright 2004-2025 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10#include <crm_internal.h>
11
12#include <stdbool.h> // bool, true, false
13
14#include <crm/common/xml.h>
15#include <pacemaker-internal.h>
16
18
27static void
28order_instance_promotion(pcmk_resource_t *clone, pcmk_resource_t *child,
29 pcmk_resource_t *last)
30{
31 // "Promote clone" -> promote instance -> "clone promoted"
38
39 // If clone is ordered, order this instance relative to last
40 if ((last != NULL) && pe__clone_is_ordered(clone)) {
44 }
45}
46
55static void
56order_instance_demotion(pcmk_resource_t *clone, pcmk_resource_t *child,
57 pcmk_resource_t *last)
58{
59 // "Demote clone" -> demote instance -> "clone demoted"
66
67 // If clone is ordered, order this instance relative to last
68 if ((last != NULL) && pe__clone_is_ordered(clone)) {
71 }
72}
73
82static void
83check_for_role_change(const pcmk_resource_t *rsc, bool *demoting,
84 bool *promoting)
85{
86 const GList *iter = NULL;
87
88 // If this is a cloned group, check group members recursively
89 if (rsc->priv->children != NULL) {
90 for (iter = rsc->priv->children; iter != NULL; iter = iter->next) {
91 check_for_role_change((const pcmk_resource_t *) iter->data,
92 demoting, promoting);
93 }
94 return;
95 }
96
97 for (iter = rsc->priv->actions; iter != NULL; iter = iter->next) {
98 const pcmk_action_t *action = (const pcmk_action_t *) iter->data;
99
100 if (*promoting && *demoting) {
101 return;
102
103 } else if (pcmk_is_set(action->flags, pcmk__action_optional)) {
104 continue;
105
106 } else if (pcmk__str_eq(PCMK_ACTION_DEMOTE, action->task,
108 *demoting = true;
109
110 } else if (pcmk__str_eq(PCMK_ACTION_PROMOTE, action->task,
112 *promoting = true;
113 }
114 }
115}
116
129static void
130apply_promoted_locations(pcmk_resource_t *child,
131 const GList *location_constraints,
132 const pcmk_node_t *chosen)
133{
134 for (const GList *iter = location_constraints; iter; iter = iter->next) {
135 const pcmk__location_t *location = iter->data;
136 const pcmk_node_t *constraint_node = NULL;
137
138 if (location->role_filter == pcmk_role_promoted) {
139 constraint_node = pe_find_node_id(location->nodes,
140 chosen->priv->id);
141 }
142 if (constraint_node != NULL) {
143 int new_priority = pcmk__add_scores(child->priv->priority,
144 constraint_node->assign->score);
145
146 pcmk__rsc_trace(child,
147 "Applying location %s to %s promotion priority on "
148 "%s: %s + %s = %s",
149 location->id, child->id,
150 pcmk__node_name(constraint_node),
152 pcmk_readable_score(constraint_node->assign->score),
153 pcmk_readable_score(new_priority));
154 child->priv->priority = new_priority;
155 }
156 }
157}
158
167static pcmk_node_t *
168node_to_be_promoted_on(const pcmk_resource_t *rsc)
169{
170 pcmk_node_t *node = NULL;
171 pcmk_node_t *local_node = NULL;
172 const pcmk_resource_t *parent = NULL;
173
174 // If this is a cloned group, bail if any group member can't be promoted
175 for (GList *iter = rsc->priv->children;
176 iter != NULL; iter = iter->next) {
177
178 pcmk_resource_t *child = (pcmk_resource_t *) iter->data;
179
180 if (node_to_be_promoted_on(child) == NULL) {
181 pcmk__rsc_trace(rsc,
182 "%s can't be promoted because member %s can't",
183 rsc->id, child->id);
184 return NULL;
185 }
186 }
187
188 node = rsc->priv->fns->location(rsc, NULL, pcmk__rsc_node_assigned);
189 if (node == NULL) {
190 pcmk__rsc_trace(rsc, "%s can't be promoted because it won't be active",
191 rsc->id);
192 return NULL;
193
194 } else if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
195 if (rsc->priv->fns->state(rsc, true) == pcmk_role_promoted) {
196 crm_notice("Unmanaged instance %s will be left promoted on %s",
197 rsc->id, pcmk__node_name(node));
198 } else {
199 pcmk__rsc_trace(rsc, "%s can't be promoted because it is unmanaged",
200 rsc->id);
201 return NULL;
202 }
203
204 } else if (rsc->priv->priority < 0) {
205 pcmk__rsc_trace(rsc,
206 "%s can't be promoted because its promotion priority "
207 "%d is negative",
208 rsc->id, rsc->priv->priority);
209 return NULL;
210
211 } else if (!pcmk__node_available(node, false, true)) {
212 pcmk__rsc_trace(rsc,
213 "%s can't be promoted because %s can't run resources",
214 rsc->id, pcmk__node_name(node));
215 return NULL;
216 }
217
218 parent = pe__const_top_resource(rsc, false);
219 local_node = g_hash_table_lookup(parent->priv->allowed_nodes,
220 node->priv->id);
221
222 if (local_node == NULL) {
223 /* It should not be possible for the scheduler to have assigned the
224 * instance to a node where its parent is not allowed, but it's good to
225 * have a fail-safe.
226 */
229 "%s can't be promoted because %s is not allowed "
230 "on %s (scheduler bug?)",
231 rsc->id, parent->id, pcmk__node_name(node));
232 } // else the instance is unmanaged and already promoted
233 return NULL;
234
235 } else if ((local_node->assign->count >= pe__clone_promoted_node_max(parent))
237 pcmk__rsc_trace(rsc,
238 "%s can't be promoted because %s has "
239 "maximum promoted instances already",
240 rsc->id, pcmk__node_name(node));
241 return NULL;
242 }
243
244 return local_node;
245}
246
258static gint
259cmp_promotable_instance(gconstpointer a, gconstpointer b)
260{
261 const pcmk_resource_t *rsc1 = (const pcmk_resource_t *) a;
262 const pcmk_resource_t *rsc2 = (const pcmk_resource_t *) b;
263
264 enum rsc_role_e role1 = pcmk_role_unknown;
265 enum rsc_role_e role2 = pcmk_role_unknown;
266
267 pcmk__assert((rsc1 != NULL) && (rsc2 != NULL));
268
269 // Check promotion priority set by pcmk__set_instance_roles()
270 if (rsc1->priv->promotion_priority > rsc2->priv->promotion_priority) {
271 pcmk__rsc_trace(rsc1,
272 "%s has higher promotion priority (%s) than %s (%d)",
273 rsc1->id,
275 rsc2->id, rsc2->priv->promotion_priority);
276 return -1;
277 }
278
279 if (rsc1->priv->promotion_priority < rsc2->priv->promotion_priority) {
280 pcmk__rsc_trace(rsc1,
281 "%s has lower promotion priority (%s) than %s (%d)",
282 rsc1->id,
284 rsc2->id, rsc2->priv->promotion_priority);
285 return 1;
286 }
287
288 // If those are the same, prefer instance whose current role is higher
289 role1 = rsc1->priv->fns->state(rsc1, true);
290 role2 = rsc2->priv->fns->state(rsc2, true);
291 if (role1 > role2) {
292 pcmk__rsc_trace(rsc1,
293 "%s has higher promotion priority than %s "
294 "(higher current role)",
295 rsc1->id, rsc2->id);
296 return -1;
297 } else if (role1 < role2) {
298 pcmk__rsc_trace(rsc1,
299 "%s has lower promotion priority than %s "
300 "(lower current role)",
301 rsc1->id, rsc2->id);
302 return 1;
303 }
304
305 // Finally, do normal clone instance sorting
306 return pcmk__cmp_instance(a, b);
307}
308
320static void
321add_promotion_priority_to_node_score(gpointer data, gpointer user_data)
322{
323 const pcmk_resource_t *child = (const pcmk_resource_t *) data;
324 pcmk_resource_t *clone = (pcmk_resource_t *) user_data;
325
326 pcmk_node_t *node = NULL;
327 const pcmk_node_t *chosen = NULL;
328 const int promotion_priority = child->priv->promotion_priority;
329
330 if (promotion_priority < 0) {
331 pcmk__rsc_trace(clone,
332 "Not adding promotion priority of %s: negative (%s)",
333 child->id, pcmk_readable_score(promotion_priority));
334 return;
335 }
336
337 chosen = child->priv->fns->location(child, NULL, pcmk__rsc_node_assigned);
338 if (chosen == NULL) {
339 pcmk__rsc_trace(clone, "Not adding promotion priority of %s: inactive",
340 child->id);
341 return;
342 }
343
344 node = g_hash_table_lookup(clone->priv->allowed_nodes,
345 chosen->priv->id);
346 pcmk__assert(node != NULL);
347
348 node->assign->score = pcmk__add_scores(promotion_priority,
349 node->assign->score);
350 pcmk__rsc_trace(clone,
351 "Added cumulative priority of %s (%s) to score on %s "
352 "(now %d)",
353 child->id, pcmk_readable_score(promotion_priority),
354 pcmk__node_name(node), node->assign->score);
355}
356
364static void
365apply_coloc_to_primary(gpointer data, gpointer user_data)
366{
367 pcmk__colocation_t *colocation = data;
368 pcmk_resource_t *clone = user_data;
369 pcmk_resource_t *dependent = colocation->dependent;
370 const float factor = colocation->score / (float) PCMK_SCORE_INFINITY;
371 const uint32_t flags = pcmk__coloc_select_active
373
374 if ((colocation->primary_role != pcmk_role_promoted)
375 || !pcmk__colocation_has_influence(colocation, NULL)) {
376 return;
377 }
378
379 pcmk__rsc_trace(clone, "Applying colocation %s (%s with promoted %s) @%s",
380 colocation->id, colocation->dependent->id,
381 colocation->primary->id,
382 pcmk_readable_score(colocation->score));
383 dependent->priv->cmds->add_colocated_node_scores(dependent, clone,
384 clone->id,
385 &(clone->priv->allowed_nodes),
386 colocation, factor, flags);
387}
388
396static void
397set_promotion_priority_to_node_score(gpointer data, gpointer user_data)
398{
400 const pcmk_resource_t *clone = (const pcmk_resource_t *) user_data;
401
402 pcmk_node_t *chosen = child->priv->fns->location(child, NULL,
404
406 && (child->priv->next_role == pcmk_role_promoted)) {
408 pcmk__rsc_trace(clone,
409 "Final promotion priority for %s is %s "
410 "(unmanaged promoted)",
412
413 } else if (chosen == NULL) {
415 pcmk__rsc_trace(clone,
416 "Final promotion priority for %s is %s "
417 "(will not be active)",
419
420 } else if (child->priv->promotion_priority < 0) {
421 pcmk__rsc_trace(clone,
422 "Final promotion priority for %s is %s "
423 "(ignoring node score)",
424 child->id,
426
427 } else {
428 const pcmk_node_t *node = NULL;
429
430 node = g_hash_table_lookup(clone->priv->allowed_nodes,
431 chosen->priv->id);
432
433 pcmk__assert(node != NULL);
434 child->priv->promotion_priority = node->assign->score;
435 pcmk__rsc_trace(clone,
436 "Adding scores for %s: "
437 "final promotion priority for %s is %s",
438 clone->id, child->id,
440 }
441}
442
449static void
450sort_promotable_instances(pcmk_resource_t *clone)
451{
452 GList *colocations = NULL;
453
455 == pcmk_rc_already) {
456 return;
457 }
459
460 for (GList *iter = clone->priv->children;
461 iter != NULL; iter = iter->next) {
462
463 pcmk_resource_t *child = (pcmk_resource_t *) iter->data;
464
465 pcmk__rsc_trace(clone,
466 "Adding scores for %s: "
467 "initial promotion priority for %s is %s",
468 clone->id, child->id,
470 }
471 pe__show_node_scores(true, clone, "Before", clone->priv->allowed_nodes,
472 clone->priv->scheduler);
473
474 g_list_foreach(clone->priv->children,
475 add_promotion_priority_to_node_score, clone);
476
477 // "this with" colocations were already applied via set_instance_priority()
478 colocations = pcmk__with_this_colocations(clone);
479 g_list_foreach(colocations, apply_coloc_to_primary, clone);
480 g_list_free(colocations);
481
482 // Ban resource from all nodes if it needs a ticket but doesn't have it
484
485 pe__show_node_scores(true, clone, "After", clone->priv->allowed_nodes,
486 clone->priv->scheduler);
487
488 // Reset promotion priorities to final node scores
489 g_list_foreach(clone->priv->children,
490 set_promotion_priority_to_node_score, clone);
491
492 // Finally, sort instances in descending order of promotion priority
493 clone->priv->children = g_list_sort(clone->priv->children,
494 cmp_promotable_instance);
496}
497
508static pcmk_resource_t *
509find_active_anon_instance(const pcmk_resource_t *clone, const char *id,
510 const pcmk_node_t *node)
511{
512 for (GList *iter = clone->priv->children; iter; iter = iter->next) {
513 pcmk_resource_t *child = iter->data;
514 pcmk_resource_t *active = NULL;
515
516 // Use ->find_rsc() in case this is a cloned group
517 active = clone->priv->fns->find_rsc(child, id, node,
520 if (active != NULL) {
521 return active;
522 }
523 }
524 return NULL;
525}
526
527/*
528 * \brief Check whether an anonymous clone instance is known on a node
529 *
530 * \param[in] clone Anonymous clone to check
531 * \param[in] id Instance ID (without instance number) to check
532 * \param[in] node Node to check
533 *
534 * \return true if \p id instance of \p clone is known on \p node,
535 * otherwise false
536 */
537static bool
538anonymous_known_on(const pcmk_resource_t *clone, const char *id,
539 const pcmk_node_t *node)
540{
541 for (GList *iter = clone->priv->children; iter; iter = iter->next) {
542 pcmk_resource_t *child = iter->data;
543
544 /* Use ->find_rsc() because this might be a cloned group, and knowing
545 * that other members of the group are known here implies nothing.
546 */
547 child = clone->priv->fns->find_rsc(child, id, NULL,
549 CRM_LOG_ASSERT(child != NULL);
550 if (child != NULL) {
551 if (g_hash_table_lookup(child->priv->probed_nodes,
552 node->priv->id)) {
553 return true;
554 }
555 }
556 }
557 return false;
558}
559
569static bool
570is_allowed(const pcmk_resource_t *rsc, const pcmk_node_t *node)
571{
572 pcmk_node_t *allowed = g_hash_table_lookup(rsc->priv->allowed_nodes,
573 node->priv->id);
574
575 return (allowed != NULL) && (allowed->assign->score >= 0);
576}
577
587static bool
588promotion_score_applies(const pcmk_resource_t *rsc, const pcmk_node_t *node)
589{
590 char *id = clone_strip(rsc->id);
591 const pcmk_resource_t *parent = pe__const_top_resource(rsc, false);
592 pcmk_resource_t *active = NULL;
593 const char *reason = "allowed";
594
595 // Some checks apply only to anonymous clone instances
596 if (!pcmk_is_set(rsc->flags, pcmk__rsc_unique)) {
597
598 // If instance is active on the node, its score definitely applies
599 active = find_active_anon_instance(parent, id, node);
600 if (active == rsc) {
601 reason = "active";
602 goto check_allowed;
603 }
604
605 /* If *no* instance is active on this node, this instance's score will
606 * count if it has been probed on this node.
607 */
608 if ((active == NULL) && anonymous_known_on(parent, id, node)) {
609 reason = "probed";
610 goto check_allowed;
611 }
612 }
613
614 /* If this clone's status is unknown on *all* nodes (e.g. cluster startup),
615 * take all instances' scores into account, to make sure we use any
616 * permanent promotion scores.
617 */
618 if ((rsc->priv->active_nodes == NULL)
619 && (g_hash_table_size(rsc->priv->probed_nodes) == 0)) {
620 reason = "none probed";
621 goto check_allowed;
622 }
623
624 /* Otherwise, we've probed and/or started the resource *somewhere*, so
625 * consider promotion scores on nodes where we know the status.
626 */
627 if ((g_hash_table_lookup(rsc->priv->probed_nodes,
628 node->priv->id) != NULL)
630 node->priv->id) != NULL)) {
631 reason = "known";
632 } else {
633 pcmk__rsc_trace(rsc,
634 "Ignoring %s promotion score (for %s) on %s: "
635 "not probed",
636 rsc->id, id, pcmk__node_name(node));
637 free(id);
638 return false;
639 }
640
641check_allowed:
642 if (is_allowed(rsc, node)) {
643 pcmk__rsc_trace(rsc, "Counting %s promotion score (for %s) on %s: %s",
644 rsc->id, id, pcmk__node_name(node), reason);
645 free(id);
646 return true;
647 }
648
649 pcmk__rsc_trace(rsc,
650 "Ignoring %s promotion score (for %s) on %s: not allowed",
651 rsc->id, id, pcmk__node_name(node));
652 free(id);
653 return false;
654}
655
666static const char *
667promotion_attr_value(const pcmk_resource_t *rsc, const pcmk_node_t *node,
668 const char *name)
669{
670 char *attr_name = NULL;
671 const char *attr_value = NULL;
672 const char *target = NULL;
674
676 // Not assigned yet
677 node_type = pcmk__rsc_node_current;
678 }
679 target = g_hash_table_lookup(rsc->priv->meta,
681 attr_name = pcmk_promotion_score_name(name);
682 attr_value = pcmk__node_attr(node, attr_name, target, node_type);
683 free(attr_name);
684 return attr_value;
685}
686
697static int
698promotion_score(const pcmk_resource_t *rsc, const pcmk_node_t *node,
699 bool *is_default)
700{
701 int score = 0;
702 int rc = pcmk_rc_ok;
703 const char *name = NULL;
704 const char *attr_value = NULL;
705
706 if (is_default != NULL) {
707 *is_default = true;
708 }
709
710 CRM_CHECK((rsc != NULL) && (node != NULL), return 0);
711
712 /* If this is an instance of a cloned group, the promotion score is the sum
713 * of all members' promotion scores.
714 */
715 if (rsc->priv->children != NULL) {
716 int score = 0;
717
718 for (const GList *iter = rsc->priv->children;
719 iter != NULL; iter = iter->next) {
720
721 const pcmk_resource_t *child = (const pcmk_resource_t *) iter->data;
722 bool child_default = false;
723 int child_score = promotion_score(child, node, &child_default);
724
725 if (!child_default && (is_default != NULL)) {
726 *is_default = false;
727 }
728 score += child_score;
729 }
730 return score;
731 }
732
733 if (!promotion_score_applies(rsc, node)) {
734 return 0;
735 }
736
737 /* For the promotion score attribute name, use the name the resource is
738 * known as in resource history, since that's what crm_attribute --promotion
739 * would have used.
740 */
741 name = pcmk__s(rsc->priv->history_id, rsc->id);
742
743 attr_value = promotion_attr_value(rsc, node, name);
744 if (attr_value != NULL) {
745 pcmk__rsc_trace(rsc, "Promotion score for %s on %s = %s",
746 name, pcmk__node_name(node),
747 pcmk__s(attr_value, "(unset)"));
748 } else if (!pcmk_is_set(rsc->flags, pcmk__rsc_unique)) {
749 /* If we don't have any resource history yet, we won't have history_id.
750 * In that case, for anonymous clones, try the resource name without
751 * any instance number.
752 */
753 char *rsc_name = clone_strip(rsc->id);
754
755 if (strcmp(rsc->id, rsc_name) != 0) {
756 attr_value = promotion_attr_value(rsc, node, rsc_name);
757 pcmk__rsc_trace(rsc, "Promotion score for %s on %s (for %s) = %s",
758 rsc_name, pcmk__node_name(node), rsc->id,
759 pcmk__s(attr_value, "(unset)"));
760 }
761 free(rsc_name);
762 }
763
764 if (attr_value == NULL) {
765 return 0;
766 }
767
768 if (is_default != NULL) {
769 *is_default = false;
770 }
771
772 rc = pcmk_parse_score(attr_value, &score, 0);
773 if (rc != pcmk_rc_ok) {
774 crm_warn("Using 0 as promotion score for %s on %s "
775 "because '%s' is not a valid score",
776 rsc->id, pcmk__node_name(node), attr_value);
777 }
778 return score;
779}
780
787void
789{
790 if (pe__set_clone_flag(rsc,
792 return;
793 }
794
795 for (GList *iter = rsc->priv->children;
796 iter != NULL; iter = iter->next) {
797
798 pcmk_resource_t *child_rsc = (pcmk_resource_t *) iter->data;
799
800 GHashTableIter iter;
801 pcmk_node_t *node = NULL;
802 int score, new_score;
803
804 g_hash_table_iter_init(&iter, child_rsc->priv->allowed_nodes);
805 while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
806 if (!pcmk__node_available(node, false, false)) {
807 /* This node will never be promoted, so don't apply the
808 * promotion score, as that may lead to clone shuffling.
809 */
810 continue;
811 }
812
813 score = promotion_score(child_rsc, node, NULL);
814 if (score > 0) {
815 new_score = pcmk__add_scores(node->assign->score, score);
816 if (new_score != node->assign->score) { // Could remain INFINITY
817 node->assign->score = new_score;
818 pcmk__rsc_trace(rsc,
819 "Added %s promotion priority (%s) to score "
820 "on %s (now %s)",
821 child_rsc->id, pcmk_readable_score(score),
822 pcmk__node_name(node),
823 pcmk_readable_score(new_score));
824 }
825 }
826
827 if (score > child_rsc->priv->priority) {
828 pcmk__rsc_trace(rsc,
829 "Updating %s priority to promotion score "
830 "(%d->%d)",
831 child_rsc->id, child_rsc->priv->priority,
832 score);
833 child_rsc->priv->priority = score;
834 }
835 }
836 }
837}
838
846static void
847set_current_role_unpromoted(void *data, void *user_data)
848{
850
851 if (rsc->priv->orig_role == pcmk_role_started) {
852 // Promotable clones should use unpromoted role instead of started
854 }
855 g_list_foreach(rsc->priv->children, set_current_role_unpromoted, NULL);
856}
857
865static void
866set_next_role_unpromoted(void *data, void *user_data)
867{
869 GList *assigned = NULL;
870
871 rsc->priv->fns->location(rsc, &assigned, pcmk__rsc_node_assigned);
872 if (assigned == NULL) {
873 pe__set_next_role(rsc, pcmk_role_stopped, "stopped instance");
874 } else {
875 pe__set_next_role(rsc, pcmk_role_unpromoted, "unpromoted instance");
876 g_list_free(assigned);
877 }
878 g_list_foreach(rsc->priv->children, set_next_role_unpromoted, NULL);
879}
880
888static void
889set_next_role_promoted(void *data, gpointer user_data)
890{
892
893 if (rsc->priv->next_role == pcmk_role_unknown) {
894 pe__set_next_role(rsc, pcmk_role_promoted, "promoted instance");
895 }
896 g_list_foreach(rsc->priv->children, set_next_role_promoted, NULL);
897}
898
905static void
906show_promotion_score(pcmk_resource_t *instance)
907{
908 pcmk_node_t *chosen = NULL;
909 const char *score_s = NULL;
910
911 chosen = instance->priv->fns->location(instance, NULL,
913 score_s = pcmk_readable_score(instance->priv->promotion_priority);
914 if (pcmk_is_set(instance->priv->scheduler->flags,
917 && (instance->priv->scheduler->priv->out != NULL)) {
918
919 pcmk__output_t *out = instance->priv->scheduler->priv->out;
920
921 out->message(out, "promotion-score", instance, chosen, score_s);
922
923 } else if (chosen == NULL) {
925 "%s promotion score (inactive): %s (priority=%d)",
926 instance->id, score_s, instance->priv->priority);
927
928 } else {
930 "%s promotion score on %s: %s (priority=%d)",
931 instance->id, pcmk__node_name(chosen),
932 score_s, instance->priv->priority);
933 }
934}
935
943static void
944set_instance_priority(gpointer data, gpointer user_data)
945{
946 pcmk_resource_t *instance = (pcmk_resource_t *) data;
947 const pcmk_resource_t *clone = (const pcmk_resource_t *) user_data;
948
949 const pcmk_node_t *chosen = NULL;
950 enum rsc_role_e next_role = pcmk_role_unknown;
951 GList *list = NULL;
952
953 pcmk__rsc_trace(clone, "Assigning priority for %s: %s", instance->id,
954 pcmk_role_text(instance->priv->next_role));
955
956 if (instance->priv->fns->state(instance, true) == pcmk_role_started) {
957 set_current_role_unpromoted(instance, NULL);
958 }
959
960 // Only an instance that will be active can be promoted
961 chosen = instance->priv->fns->location(instance, &list,
963 if (pcmk__list_of_multiple(list)) {
964 pcmk__config_err("Cannot promote non-colocated child %s",
965 instance->id);
966 }
967 g_list_free(list);
968 if (chosen == NULL) {
969 return;
970 }
971
972 next_role = instance->priv->fns->state(instance, false);
973 switch (next_role) {
976 // Set instance priority to its promotion score (or -1 if none)
977 {
978 bool is_default = false;
979
980 instance->priv->priority = promotion_score(instance, chosen,
981 &is_default);
982 if (is_default) {
983 /* Default to -1 if no value is set. This allows instances
984 * eligible for promotion to be specified based solely on
985 * PCMK_XE_RSC_LOCATION constraints, but prevents any
986 * instance from being promoted if neither a constraint nor
987 * a promotion score is present.
988 */
989 instance->priv->priority = -1;
990 }
991 }
992 break;
993
996 // Instance can't be promoted
997 instance->priv->priority = -PCMK_SCORE_INFINITY;
998 break;
999
1000 case pcmk_role_promoted:
1001 // Nothing needed (re-creating actions after scheduling fencing)
1002 break;
1003
1004 default:
1005 CRM_CHECK(FALSE, crm_err("Unknown resource role %d for %s",
1006 next_role, instance->id));
1007 }
1008
1009 // Add relevant location constraint scores for promoted role
1010 apply_promoted_locations(instance, instance->priv->location_constraints,
1011 chosen);
1012 apply_promoted_locations(instance, clone->priv->location_constraints,
1013 chosen);
1014
1015 // Consider instance's role-based colocations with other resources
1016 list = pcmk__this_with_colocations(instance);
1017 for (GList *iter = list; iter != NULL; iter = iter->next) {
1018 pcmk__colocation_t *cons = (pcmk__colocation_t *) iter->data;
1019
1020 instance->priv->cmds->apply_coloc_score(instance, cons->primary, cons,
1021 true);
1022 }
1023 g_list_free(list);
1024
1025 instance->priv->promotion_priority = instance->priv->priority;
1026 if (next_role == pcmk_role_promoted) {
1028 }
1029 pcmk__rsc_trace(clone, "Assigning %s priority = %d",
1030 instance->id, instance->priv->priority);
1031}
1032
1040static void
1041set_instance_role(gpointer data, gpointer user_data)
1042{
1043 pcmk_resource_t *instance = (pcmk_resource_t *) data;
1044 int *count = (int *) user_data;
1045
1046 const pcmk_resource_t *clone = pe__const_top_resource(instance, false);
1047 const pcmk_scheduler_t *scheduler = instance->priv->scheduler;
1048 pcmk_node_t *chosen = NULL;
1049
1050 show_promotion_score(instance);
1051
1052 if (instance->priv->promotion_priority < 0) {
1053 pcmk__rsc_trace(clone, "Not supposed to promote instance %s",
1054 instance->id);
1055
1056 } else if ((*count < pe__clone_promoted_max(instance))
1057 || !pcmk_is_set(clone->flags, pcmk__rsc_managed)) {
1058 chosen = node_to_be_promoted_on(instance);
1059 }
1060
1061 if (chosen == NULL) {
1062 set_next_role_unpromoted(instance, NULL);
1063 return;
1064 }
1065
1066 if ((instance->priv->orig_role < pcmk_role_promoted)
1069 crm_notice("Clone instance %s cannot be promoted without quorum",
1070 instance->id);
1071 set_next_role_unpromoted(instance, NULL);
1072 return;
1073 }
1074
1075 chosen->assign->count++;
1076 pcmk__rsc_info(clone, "Choosing %s (%s) on %s for promotion",
1077 instance->id, pcmk_role_text(instance->priv->orig_role),
1078 pcmk__node_name(chosen));
1079 set_next_role_promoted(instance, NULL);
1080 (*count)++;
1081}
1082
1089void
1091{
1092 int promoted = 0;
1093 GHashTableIter iter;
1094 pcmk_node_t *node = NULL;
1095
1096 // Repurpose count to track the number of promoted instances assigned
1097 g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes);
1098 while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
1099 node->assign->count = 0;
1100 }
1101
1102 // Set instances' promotion priorities and sort by highest priority first
1103 g_list_foreach(rsc->priv->children, set_instance_priority, rsc);
1104 sort_promotable_instances(rsc);
1105
1106 // Choose the first N eligible instances to be promoted
1107 g_list_foreach(rsc->priv->children, set_instance_role, &promoted);
1108 pcmk__rsc_info(rsc, "%s: Promoted %d instances of a possible %d",
1109 rsc->id, promoted, pe__clone_promoted_max(rsc));
1110}
1111
1121static void
1122create_promotable_instance_actions(pcmk_resource_t *clone,
1123 bool *any_promoting, bool *any_demoting)
1124{
1125 for (GList *iter = clone->priv->children;
1126 iter != NULL; iter = iter->next) {
1127
1128 pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1129
1130 instance->priv->cmds->create_actions(instance);
1131 check_for_role_change(instance, any_demoting, any_promoting);
1132 }
1133}
1134
1145static void
1146reset_instance_priorities(pcmk_resource_t *clone)
1147{
1148 for (GList *iter = clone->priv->children;
1149 iter != NULL; iter = iter->next) {
1150
1151 pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1152
1153 instance->priv->priority = clone->priv->priority;
1154 }
1155}
1156
1163void
1165{
1166 bool any_promoting = false;
1167 bool any_demoting = false;
1168
1169 // Create actions for each clone instance individually
1170 create_promotable_instance_actions(clone, &any_promoting, &any_demoting);
1171
1172 // Create pseudo-actions for clone as a whole
1173 pe__create_promotable_pseudo_ops(clone, any_promoting, any_demoting);
1174
1175 // Undo our temporary repurposing of resource priority for instances
1176 reset_instance_priorities(clone);
1177}
1178
1185void
1187{
1188 pcmk_resource_t *previous = NULL; // Needed for ordered clones
1189
1191
1192 for (GList *iter = clone->priv->children;
1193 iter != NULL; iter = iter->next) {
1194
1195 pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1196
1197 // Demote before promote
1199 instance, PCMK_ACTION_PROMOTE,
1201
1202 order_instance_promotion(clone, instance, previous);
1203 order_instance_demotion(clone, instance, previous);
1204 previous = instance;
1205 }
1206}
1207
1217static void
1218update_dependent_allowed_nodes(pcmk_resource_t *dependent,
1219 const pcmk_resource_t *primary,
1220 const pcmk_node_t *primary_node,
1221 const pcmk__colocation_t *colocation)
1222{
1223 GHashTableIter iter;
1224 pcmk_node_t *node = NULL;
1225 const char *primary_value = NULL;
1226 const char *attr = colocation->node_attribute;
1227
1228 if (colocation->score >= PCMK_SCORE_INFINITY) {
1229 return; // Colocation is mandatory, so allowed node scores don't matter
1230 }
1231
1232 primary_value = pcmk__colocation_node_attr(primary_node, attr, primary);
1233
1234 pcmk__rsc_trace(colocation->primary,
1235 "Applying %s (%s with %s on %s by %s @%d) to %s",
1236 colocation->id, colocation->dependent->id,
1237 colocation->primary->id, pcmk__node_name(primary_node),
1238 attr, colocation->score, dependent->id);
1239
1240 g_hash_table_iter_init(&iter, dependent->priv->allowed_nodes);
1241 while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
1242 const char *dependent_value = pcmk__colocation_node_attr(node, attr,
1243 dependent);
1244
1245 if (pcmk__str_eq(primary_value, dependent_value, pcmk__str_casei)) {
1246 node->assign->score = pcmk__add_scores(node->assign->score,
1247 colocation->score);
1248 pcmk__rsc_trace(colocation->primary,
1249 "Added %s score (%s) to %s (now %s)",
1250 colocation->id,
1251 pcmk_readable_score(colocation->score),
1252 pcmk__node_name(node),
1253 pcmk_readable_score(node->assign->score));
1254 }
1255 }
1256}
1257
1265void
1267 pcmk_resource_t *dependent,
1268 const pcmk__colocation_t *colocation)
1269{
1270 GList *affected_nodes = NULL;
1271
1272 /* Build a list of all nodes where an instance of the primary will be, and
1273 * (for optional colocations) update the dependent's allowed node scores for
1274 * each one.
1275 */
1276 for (GList *iter = primary->priv->children;
1277 iter != NULL; iter = iter->next) {
1278
1279 pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1280 pcmk_node_t *node = NULL;
1281
1282 node = instance->priv->fns->location(instance, NULL,
1284 if (node == NULL) {
1285 continue;
1286 }
1287 if (instance->priv->fns->state(instance,
1288 false) == colocation->primary_role) {
1289 update_dependent_allowed_nodes(dependent, primary, node,
1290 colocation);
1291 affected_nodes = g_list_prepend(affected_nodes, node);
1292 }
1293 }
1294
1295 /* For mandatory colocations, add the primary's node score to the
1296 * dependent's node score for each affected node, and ban the dependent
1297 * from all other nodes.
1298 *
1299 * However, skip this for promoted-with-promoted colocations, otherwise
1300 * inactive dependent instances can't start (in the unpromoted role).
1301 */
1302 if ((colocation->score >= PCMK_SCORE_INFINITY)
1303 && ((colocation->dependent_role != pcmk_role_promoted)
1304 || (colocation->primary_role != pcmk_role_promoted))) {
1305
1306 pcmk__rsc_trace(colocation->primary,
1307 "Applying %s (mandatory %s with %s) to %s",
1308 colocation->id, colocation->dependent->id,
1309 colocation->primary->id, dependent->id);
1310 pcmk__colocation_intersect_nodes(dependent, primary, colocation,
1311 affected_nodes, true);
1312 }
1313 g_list_free(affected_nodes);
1314}
1315
1326int
1328 pcmk_resource_t *dependent,
1329 const pcmk__colocation_t *colocation)
1330{
1331 pcmk_resource_t *primary_instance = NULL;
1332
1333 // Look for a primary instance where dependent will be
1334 primary_instance = pcmk__find_compatible_instance(dependent, primary,
1335 colocation->primary_role,
1336 false);
1337
1338 if (primary_instance != NULL) {
1339 // Add primary instance's priority to dependent's
1340 int new_priority = pcmk__add_scores(dependent->priv->priority,
1341 colocation->score);
1342
1343 pcmk__rsc_trace(colocation->primary,
1344 "Applying %s (%s with %s) to %s priority "
1345 "(%s + %s = %s)",
1346 colocation->id, colocation->dependent->id,
1347 colocation->primary->id, dependent->id,
1348 pcmk_readable_score(dependent->priv->priority),
1349 pcmk_readable_score(colocation->score),
1350 pcmk_readable_score(new_priority));
1351 dependent->priv->priority = new_priority;
1352 return colocation->score;
1353 }
1354
1355 if (colocation->score >= PCMK_SCORE_INFINITY) {
1356 // Mandatory colocation, but primary won't be here
1357 pcmk__rsc_trace(colocation->primary,
1358 "Applying %s (%s with %s) to %s: can't be promoted",
1359 colocation->id, colocation->dependent->id,
1360 colocation->primary->id, dependent->id);
1361 dependent->priv->priority = -PCMK_SCORE_INFINITY;
1362 return -PCMK_SCORE_INFINITY;
1363 }
1364 return 0;
1365}
@ pcmk__ar_then_implies_first_graphed
If 'then' is required, 'first' must be added to the transition graph.
@ pcmk__ar_first_implies_then_graphed
If 'first' is required and runnable, 'then' must be in graph.
@ pcmk__ar_ordered
Actions are ordered (optionally, if no other flags are set)
#define PCMK_ACTION_PROMOTED
Definition actions.h:58
#define PCMK_ACTION_PROMOTE
Definition actions.h:57
#define PCMK_ACTION_DEMOTED
Definition actions.h:41
#define PCMK_ACTION_DEMOTE
Definition actions.h:40
@ pcmk__action_optional
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
Definition attrs.c:114
const char * parent
Definition cib.c:27
const char * name
Definition cib.c:26
@ pcmk__clone_promotion_added
@ pcmk__clone_promotion_constrained
bool pcmk__is_daemon
Definition logging.c:47
uint64_t flags
Definition remote.c:3
char * pcmk_promotion_score_name(const char *rsc_id)
Return the name of the node attribute used as a promotion score.
Definition attrs.c:88
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition util.h:80
char data[0]
Definition cpg.c:10
@ pcmk__coloc_select_active
@ pcmk__coloc_select_nonnegative
G_GNUC_INTERNAL void pcmk__require_promotion_tickets(pcmk_resource_t *rsc)
G_GNUC_INTERNAL void pcmk__colocation_intersect_nodes(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation, const GList *primary_nodes, bool merge_scores)
#define pcmk__order_resource_actions(first_rsc, first_task, then_rsc, then_task, flags)
G_GNUC_INTERNAL pcmk_resource_t * pcmk__find_compatible_instance(const pcmk_resource_t *match_rsc, const pcmk_resource_t *rsc, enum rsc_role_e role, bool current)
G_GNUC_INTERNAL const char * pcmk__colocation_node_attr(const pcmk_node_t *node, const char *attr, const pcmk_resource_t *rsc)
G_GNUC_INTERNAL GList * pcmk__with_this_colocations(const pcmk_resource_t *rsc)
G_GNUC_INTERNAL gint pcmk__cmp_instance(gconstpointer a, gconstpointer b)
G_GNUC_INTERNAL bool pcmk__colocation_has_influence(const pcmk__colocation_t *colocation, const pcmk_resource_t *rsc)
G_GNUC_INTERNAL GList * pcmk__this_with_colocations(const pcmk_resource_t *rsc)
G_GNUC_INTERNAL void pcmk__promotable_restart_ordering(pcmk_resource_t *rsc)
G_GNUC_INTERNAL bool pcmk__node_available(const pcmk_node_t *node, bool consider_score, bool consider_guest)
#define crm_warn(fmt, args...)
Definition logging.h:360
#define CRM_LOG_ASSERT(expr)
Definition logging.h:196
#define crm_notice(fmt, args...)
Definition logging.h:363
#define CRM_CHECK(expr, failure_action)
Definition logging.h:213
#define crm_err(fmt, args...)
Definition logging.h:357
#define pcmk__config_err(fmt...)
pcmk_scheduler_t * scheduler
#define PCMK_META_CONTAINER_ATTRIBUTE_TARGET
Definition options.h:86
const char * action
Definition pcmk_fence.c:32
const char * target
Definition pcmk_fence.c:31
void pcmk__add_promotion_scores(pcmk_resource_t *rsc)
void pcmk__order_promotable_instances(pcmk_resource_t *clone)
int pcmk__update_promotable_dependent_priority(const pcmk_resource_t *primary, pcmk_resource_t *dependent, const pcmk__colocation_t *colocation)
void pcmk__create_promotable_actions(pcmk_resource_t *clone)
void pcmk__set_instance_roles(pcmk_resource_t *rsc)
void pcmk__update_dependent_with_promotable(const pcmk_resource_t *primary, pcmk_resource_t *dependent, const pcmk__colocation_t *colocation)
Update dependent for a colocation with a promotable clone.
void pe__create_promotable_pseudo_ops(pcmk_resource_t *clone, bool any_promoting, bool any_demoting)
Definition clone.c:1105
int pe__clone_promoted_node_max(const pcmk_resource_t *clone)
Definition clone.c:107
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition complex.c:1025
#define pe__show_node_scores(level, rsc, text, nodes, scheduler)
Definition internal.h:158
bool pe__clone_is_ordered(const pcmk_resource_t *clone)
Definition clone.c:1043
int pe__set_clone_flag(pcmk_resource_t *clone, enum pcmk__clone_flags flag)
Definition clone.c:1062
void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
Definition complex.c:1247
char * clone_strip(const char *last_rsc_id)
Definition unpack.c:1954
int pe__clone_promoted_max(const pcmk_resource_t *clone)
Definition clone.c:90
@ pcmk_rsc_match_clone_only
Match only clones and their instances, by either clone or instance ID.
Definition resources.h:40
@ pcmk_rsc_match_current_node
If matching by node, compare current node instead of assigned node.
Definition resources.h:43
@ pcmk__rsc_managed
@ pcmk__rsc_unassigned
@ pcmk__rsc_unique
@ pcmk__rsc_updating_nodes
#define pcmk__set_rsc_flags(resource, flags_to_set)
@ pcmk__rsc_node_current
@ pcmk__rsc_node_assigned
#define pcmk__clear_rsc_flags(resource, flags_to_clear)
@ pcmk_rc_ok
Definition results.h:159
@ pcmk_rc_already
Definition results.h:150
#define pcmk__assert(expr)
const char * pcmk_role_text(enum rsc_role_e role)
Get readable description of a resource role.
Definition roles.c:23
rsc_role_e
Definition roles.h:34
@ pcmk_role_started
Started.
Definition roles.h:37
@ pcmk_role_unknown
Resource role is unknown.
Definition roles.h:35
@ pcmk_role_unpromoted
Unpromoted.
Definition roles.h:38
@ pcmk_role_promoted
Promoted.
Definition roles.h:39
@ pcmk_role_stopped
Stopped.
Definition roles.h:36
@ pcmk_no_quorum_freeze
Do not recover resources from outside partition.
Definition scheduler.h:39
#define pcmk__sched_err(scheduler, fmt...)
#define pcmk__rsc_info(rsc, fmt, args...)
#define pcmk__rsc_trace(rsc, fmt, args...)
#define pcmk__rsc_debug(rsc, fmt, args...)
@ pcmk__sched_quorate
@ pcmk__sched_output_scores
int pcmk_parse_score(const char *score_s, int *score, int default_score)
Parse an integer score from a string.
Definition scores.c:34
const char * pcmk_readable_score(int score)
Return a displayable static string for a score value.
Definition scores.c:102
#define PCMK_SCORE_INFINITY
Integer score to use to represent "infinity".
Definition scores.h:26
int pcmk__add_scores(int score1, int score2)
Definition scores.c:159
pcmk_node_t * pe_find_node_id(const GList *node_list, const char *id)
Find a node by ID in a list of nodes.
Definition status.c:248
@ pcmk__str_none
@ pcmk__str_casei
void(* create_actions)(pcmk_resource_t *rsc)
int(* apply_coloc_score)(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent)
void(* add_colocated_node_scores)(pcmk_resource_t *source_rsc, const pcmk_resource_t *target_rsc, const char *log_id, GHashTable **nodes, const pcmk__colocation_t *colocation, float factor, uint32_t flags)
const char * node_attribute
pcmk_resource_t * primary
pcmk_resource_t * dependent
Location constraint object.
enum rsc_role_e role_filter
pcmk_scheduler_t * scheduler
This structure contains everything that makes up a single output formatter.
int(* message)(pcmk__output_t *out, const char *message_id,...)
pcmk_scheduler_t * scheduler
const pcmk__assignment_methods_t * cmds
const pcmk__rsc_methods_t * fns
unsigned long long flags
Definition resources.h:69
pcmk__resource_private_t * priv
Definition resources.h:61
pcmk_resource_t *(* find_rsc)(pcmk_resource_t *rsc, const char *search, const pcmk_node_t *node, uint32_t flags)
enum rsc_role_e(* state)(const pcmk_resource_t *rsc, bool current)
pcmk_node_t *(* location)(const pcmk_resource_t *rsc, GList **list, uint32_t target)
pcmk__scheduler_private_t * priv
Definition scheduler.h:99
enum pe_quorum_policy no_quorum_policy
Definition scheduler.h:93
uint64_t flags
Definition scheduler.h:89
pcmk__node_private_t * priv
Definition nodes.h:85
struct pcmk__node_assignment * assign
Definition nodes.h:79
Wrappers for and extensions to libxml2.